From 9f5be553f6e475b323587ce0a5cda61491eb62f7 Mon Sep 17 00:00:00 2001 From: jackyko Date: Tue, 23 Jan 2018 18:31:39 +0800 Subject: [PATCH 0001/1078] tpu contrib fix --- tensorflow/contrib/cmake/tf_core_framework.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 24d7fb82a2..129c208ecd 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -126,7 +126,9 @@ endfunction() file(GLOB_RECURSE tf_protos_cc_srcs RELATIVE ${tensorflow_source_dir} "${tensorflow_source_dir}/tensorflow/core/*.proto" "${tensorflow_source_dir}/tensorflow/contrib/boosted_trees/proto/*.proto" + "${tensorflow_source_dir}/tensorflow/contrib/tpu/proto/*.proto" ) + RELATIVE_PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS ${tensorflow_source_dir} ${tf_protos_cc_srcs} ) -- GitLab From 1c6b52927680e8bb1b1f0ebe98c5a7a7033f9af4 Mon Sep 17 00:00:00 2001 From: jackyko Date: Thu, 25 Jan 2018 14:16:53 +0800 Subject: [PATCH 0002/1078] improve cmake gpu build --- tensorflow/contrib/cmake/CMakeLists.txt | 12 ++++++++---- tensorflow/contrib/cmake/external/grpc.cmake | 11 ++++++----- tensorflow/contrib/cmake/tf_cc_ops.cmake | 2 +- tensorflow/contrib/cmake/tf_core_framework.cmake | 8 ++++++-- tensorflow/core/platform/default/gpu/cupti_wrapper.h | 8 ++++---- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 817e96f5da..5f54ba5e2d 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -291,6 +291,8 @@ if (tensorflow_ENABLE_GPU) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};--include-path ${PROJECT_BINARY_DIR}/$\{build_configuration\};--expt-relaxed-constexpr) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true) # Flush denormals to zero set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include) + + message(STATUS ${CUDA_INCLUDE}) include_directories(${CUDA_INCLUDE}) if (WIN32) add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2) @@ -399,10 +401,12 @@ else(tensorflow_ENABLE_GPU) msvcp_dll_name=msvcp140.dll) endif(tensorflow_ENABLE_GPU) -# Find python executable -include(FindPythonInterp) -if(NOT ${PYTHONINTERP_FOUND}) - message(FATAL_ERROR "CMake was unable to find a python interpreter.") +if(tensorflow_BUILD_PYTHON_BINDINGS) + # Find python executable + include(FindPythonInterp) + if(NOT ${PYTHONINTERP_FOUND}) + message(FATAL_ERROR "CMake was unable to find a python interpreter.") + endif() endif() # Let's get to work! diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 28adb4fe84..28b85e0a19 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -21,14 +21,16 @@ set(GRPC_TAG 730b778632e79cc3c96ad237f282d687ee325ce7) if(WIN32) set(grpc_STATIC_LIBRARIES - ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/Release/grpc++_unsecure.lib - ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/Release/grpc_unsecure.lib - ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/Release/gpr.lib) + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/$(Configuration)/grpc++_unsecure.lib + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/$(Configuration)/grpc_unsecure.lib + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/$(Configuration)/gpr.lib) else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a - ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a) + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/zlib/libz.a) endif() add_definitions(-DGRPC_ARES=0) @@ -49,7 +51,6 @@ ExternalProject_Add(grpc -DPROTOBUF_INCLUDE_DIRS:STRING=${PROTOBUF_INCLUDE_DIRS} -DPROTOBUF_LIBRARIES:STRING=${protobuf_STATIC_LIBRARIES} -DZLIB_ROOT:STRING=${ZLIB_INSTALL} - -DgRPC_SSL_PROVIDER:STRING=NONE ) # grpc/src/core/ext/census/tracing.c depends on the existence of openssl/rand.h. diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index f3cf3e7044..1791dad48c 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -149,7 +149,7 @@ add_library(tf_cc OBJECT ${tf_cc_srcs}) add_dependencies(tf_cc tf_cc_framework tf_cc_ops) if (WIN32) - set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib") + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/$(Configuration)/pywrap_tensorflow_internal.lib") else (WIN32) set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so") endif (WIN32) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 129c208ecd..91f8f51e5b 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -203,14 +203,18 @@ file(GLOB tf_core_platform_srcs "${tensorflow_source_dir}/tensorflow/core/framework/resource_handle.h" "${tensorflow_source_dir}/tensorflow/core/framework/resource_handle.cc") if (NOT tensorflow_ENABLE_GPU) - file(GLOB tf_core_platform_gpu_srcs + file(GLOB tf_core_platform_gpu_srcs_exclude "${tensorflow_source_dir}/tensorflow/core/platform/cuda_libdevice_path.*" "${tensorflow_source_dir}/tensorflow/core/platform/default/cuda_libdevice_path.*") - list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_gpu_srcs}) + list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_gpu_srcs_exclude}) else() file(GLOB tf_core_platform_srcs_exclude "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc") list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_srcs_exclude}) + # file(GLOB tf_core_platform_gpu_srcs + # "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/*.h" + # "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/*.cc") + # list(APPEND tf_core_platform_srcs ${tf_core_platform_gpu_srcs}) endif() file(GLOB tf_core_platform_exclude_srcs diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.h b/tensorflow/core/platform/default/gpu/cupti_wrapper.h index acd889e474..f1e3af12cf 100644 --- a/tensorflow/core/platform/default/gpu/cupti_wrapper.h +++ b/tensorflow/core/platform/default/gpu/cupti_wrapper.h @@ -20,11 +20,11 @@ limitations under the License. #include #include -#if defined(WIN32) +// #if defined(WIN32) #include "extras/CUPTI/include/cupti.h" -#else -#include "cuda/extras/CUPTI/include/cupti.h" -#endif +// #else +// #include "cuda/extras/CUPTI/include/cupti.h" +// #endif namespace perftools { namespace gputools { namespace profiler { -- GitLab From f8ebe607de0f71a4eace98cd110a4e931332aa6b Mon Sep 17 00:00:00 2001 From: jackyko Date: Thu, 25 Jan 2018 14:19:22 +0800 Subject: [PATCH 0003/1078] improve cmake option with python build --- tensorflow/contrib/cmake/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 5f54ba5e2d..8e736f4def 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -292,7 +292,6 @@ if (tensorflow_ENABLE_GPU) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-ftz=true) # Flush denormals to zero set(CUDA_INCLUDE ${CUDA_TOOLKIT_TARGET_DIR} ${CUDA_TOOLKIT_TARGET_DIR}/extras/CUPTI/include) - message(STATUS ${CUDA_INCLUDE}) include_directories(${CUDA_INCLUDE}) if (WIN32) add_definitions(-DGOOGLE_CUDA=1 -DTF_EXTRA_CUDA_CAPABILITIES=3.0,3.5,5.2) -- GitLab From 27ddbee4d59874a796020678f8eb45cd4b5c5ff1 Mon Sep 17 00:00:00 2001 From: jackyko Date: Thu, 25 Jan 2018 14:21:38 +0800 Subject: [PATCH 0004/1078] better variable name --- tensorflow/contrib/cmake/tf_core_framework.cmake | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 91f8f51e5b..e39b7f9289 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -211,10 +211,6 @@ else() file(GLOB tf_core_platform_srcs_exclude "${tensorflow_source_dir}/tensorflow/core/platform/default/device_tracer.cc") list(REMOVE_ITEM tf_core_platform_srcs ${tf_core_platform_srcs_exclude}) - # file(GLOB tf_core_platform_gpu_srcs - # "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/*.h" - # "${tensorflow_source_dir}/tensorflow/core/platform/default/gpu/*.cc") - # list(APPEND tf_core_platform_srcs ${tf_core_platform_gpu_srcs}) endif() file(GLOB tf_core_platform_exclude_srcs -- GitLab From 418657e25288945cbccf11eb4bbd4e1bcfc4b2ad Mon Sep 17 00:00:00 2001 From: jackyko Date: Fri, 26 Jan 2018 10:22:45 +0800 Subject: [PATCH 0005/1078] gcc bug fix --- tensorflow/contrib/cmake/tf_python.cmake | 17 ++++++++++++++++- .../core/platform/default/gpu/cupti_wrapper.h | 8 ++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 8862390d2b..c8b6ced79c 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -585,13 +585,28 @@ target_include_directories(pywrap_tensorflow_internal PUBLIC ${NUMPY_INCLUDE_DIR} ) -target_link_libraries(pywrap_tensorflow_internal PRIVATE +if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) + # There is a bug in GCC 5 resulting in undefined reference to a __cpu_model function when + # linking to the tensorflow library. Adding the following libraries fixes it. + # See issue on github: https://github.com/tensorflow/tensorflow/issues/9593 + target_link_libraries(pywrap_tensorflow_internal PRIVATE ${tf_core_gpu_kernels_lib} ${tensorflow_EXTERNAL_LIBRARIES} tf_protos_cc tf_python_protos_cc ${PYTHON_LIBRARIES} + gcc_s + gcc ) +else() + target_link_libraries(pywrap_tensorflow_internal PRIVATE + ${tf_core_gpu_kernels_lib} + ${tensorflow_EXTERNAL_LIBRARIES} + tf_protos_cc + tf_python_protos_cc + ${PYTHON_LIBRARIES} +) +endif() if(WIN32) diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.h b/tensorflow/core/platform/default/gpu/cupti_wrapper.h index f1e3af12cf..acd889e474 100644 --- a/tensorflow/core/platform/default/gpu/cupti_wrapper.h +++ b/tensorflow/core/platform/default/gpu/cupti_wrapper.h @@ -20,11 +20,11 @@ limitations under the License. #include #include -// #if defined(WIN32) +#if defined(WIN32) #include "extras/CUPTI/include/cupti.h" -// #else -// #include "cuda/extras/CUPTI/include/cupti.h" -// #endif +#else +#include "cuda/extras/CUPTI/include/cupti.h" +#endif namespace perftools { namespace gputools { namespace profiler { -- GitLab From b5fb75bb77d671bfd77fb3eb5f7ac3be5604e96b Mon Sep 17 00:00:00 2001 From: jackyko Date: Fri, 26 Jan 2018 11:39:47 +0800 Subject: [PATCH 0006/1078] fix build issue with cmake --- tensorflow/core/platform/default/gpu/cupti_wrapper.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/platform/default/gpu/cupti_wrapper.h b/tensorflow/core/platform/default/gpu/cupti_wrapper.h index acd889e474..fd5968c827 100644 --- a/tensorflow/core/platform/default/gpu/cupti_wrapper.h +++ b/tensorflow/core/platform/default/gpu/cupti_wrapper.h @@ -20,10 +20,10 @@ limitations under the License. #include #include -#if defined(WIN32) -#include "extras/CUPTI/include/cupti.h" -#else +#if defined(PLATFORM_GOOGLE) #include "cuda/extras/CUPTI/include/cupti.h" +#else +#include "extras/CUPTI/include/cupti.h" #endif namespace perftools { namespace gputools { -- GitLab From cf55a3eb11df0ad5de84202ba9805977c5301ca5 Mon Sep 17 00:00:00 2001 From: jackyko Date: Fri, 26 Jan 2018 16:15:52 +0800 Subject: [PATCH 0007/1078] add missing ops --- tensorflow/contrib/cmake/tf_core_ops.cmake | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index 138993db35..15b0227335 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -15,7 +15,7 @@ set(tf_op_lib_names "audio_ops" "array_ops" - "batch_ops" + "batch_ops" "bitwise_ops" "candidate_sampling_ops" "checkpoint_ops" @@ -23,11 +23,12 @@ set(tf_op_lib_names "ctc_ops" "data_flow_ops" "dataset_ops" + "function_ops" "functional_ops" "image_ops" "io_ops" "linalg_ops" - "list_ops" + "list_ops" "lookup_ops" "logging_ops" "math_ops" @@ -46,8 +47,9 @@ set(tf_op_lib_names "state_ops" "stateless_random_ops" "string_ops" - "summary_ops" + "summary_ops" "training_ops" + "word2vec_ops" ) foreach(tf_op_lib_name ${tf_op_lib_names}) -- GitLab From ab59f29dc8f930443881a1e27f7080d8ba87b119 Mon Sep 17 00:00:00 2001 From: JackyKo Date: Fri, 26 Jan 2018 21:52:02 +0800 Subject: [PATCH 0008/1078] fix for cmake gui build --- tensorflow/contrib/cmake/tf_cc_ops.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/tf_cc_ops.cmake b/tensorflow/contrib/cmake/tf_cc_ops.cmake index f3cf3e7044..1791dad48c 100644 --- a/tensorflow/contrib/cmake/tf_cc_ops.cmake +++ b/tensorflow/contrib/cmake/tf_cc_ops.cmake @@ -149,7 +149,7 @@ add_library(tf_cc OBJECT ${tf_cc_srcs}) add_dependencies(tf_cc tf_cc_framework tf_cc_ops) if (WIN32) - set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/pywrap_tensorflow_internal.lib") + set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/$(Configuration)/pywrap_tensorflow_internal.lib") else (WIN32) set (pywrap_tensorflow_lib "${CMAKE_CURRENT_BINARY_DIR}/libpywrap_tensorflow_internal.so") endif (WIN32) -- GitLab From 3b316fc9d9468d957d4866e0397d4455bd3516bb Mon Sep 17 00:00:00 2001 From: JackyKo Date: Sat, 27 Jan 2018 12:26:21 +0800 Subject: [PATCH 0009/1078] add version --- tensorflow/contrib/cmake/CMakeLists.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 25d5dedb0e..c26c7cd3b7 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.5) # Project -project(tensorflow C CXX) +project(tensorflow VERSION 1.5.0 LANGUAGES C CXX) # Set C++14 as standard for the whole project set(CMAKE_CXX_STANDARD 14) @@ -134,9 +134,6 @@ if(WIN32) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /D_ITERATOR_DEBUG_LEVEL=0") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /D_ITERATOR_DEBUG_LEVEL=0") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /D_ITERATOR_DEBUG_LEVEL=0") - - # Try to avoid flaky failures due to failed generation of generate.stamp files. - set(CMAKE_SUPPRESS_REGENERATION ON) endif() if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") -- GitLab From d6b7077d2dfd2d93410fc77ac3c14064f73ae933 Mon Sep 17 00:00:00 2001 From: JackyKo Date: Sat, 27 Jan 2018 13:53:04 +0800 Subject: [PATCH 0010/1078] tensorflow can be used by cmake find_package function --- .../contrib/cmake/TensorflowConfig.cmake.in | 16 ++++ .../cmake/TensorflowConfigVersion.cmake.in | 11 +++ tensorflow/contrib/cmake/tf_shared_lib.cmake | 80 +++++++++++++++++-- 3 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 tensorflow/contrib/cmake/TensorflowConfig.cmake.in create mode 100644 tensorflow/contrib/cmake/TensorflowConfigVersion.cmake.in diff --git a/tensorflow/contrib/cmake/TensorflowConfig.cmake.in b/tensorflow/contrib/cmake/TensorflowConfig.cmake.in new file mode 100644 index 0000000000..cc04db6e95 --- /dev/null +++ b/tensorflow/contrib/cmake/TensorflowConfig.cmake.in @@ -0,0 +1,16 @@ +# - Config file for the Tensorflow package +# It defines the following variables +# TENSORFLOW_INCLUDE_DIRS - include directories for FooBar +# TENSORFLOW_LIBRARIES - libraries to link against + +# Compute paths +get_filename_component(TENSORFLOW_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +set(TENSORFLOW_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@") + +# Our library dependencies (contains definitions for IMPORTED targets) +if(NOT TENSORFLOW_BINARY_DIR) + include("${TENSORFLOW_CMAKE_DIR}/TensorflowTargets.cmake") +endif() + +# These are IMPORTED targets created by TensorflowTargets.cmake +set(TENSORFLOW_LIBRARIES tensorflow) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/TensorflowConfigVersion.cmake.in b/tensorflow/contrib/cmake/TensorflowConfigVersion.cmake.in new file mode 100644 index 0000000000..2a9609ddb9 --- /dev/null +++ b/tensorflow/contrib/cmake/TensorflowConfigVersion.cmake.in @@ -0,0 +1,11 @@ +set(PACKAGE_VERSION "@TENSORFLOW_VERSION@") + +# Check whether the requested PACKAGE_FIND_VERSION is compatible +if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") + set(PACKAGE_VERSION_COMPATIBLE FALSE) +else() + set(PACKAGE_VERSION_COMPATIBLE TRUE) + if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") + set(PACKAGE_VERSION_EXACT TRUE) + endif() +endif() \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 571d2b0dec..a4a25dd557 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -91,6 +91,27 @@ if(CMAKE_COMPILER_IS_GNUCC AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) target_link_libraries(tensorflow PRIVATE gcc_s gcc) endif() +# Offer the user the choice of overriding the installation directories +set(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries") +set(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") +set(INSTALL_INCLUDE_DIR include CACHE PATH + "Installation directory for header files") +if(WIN32 AND NOT CYGWIN) + set(DEF_INSTALL_CMAKE_DIR cmake) +else() + set(DEF_INSTALL_CMAKE_DIR lib/cmake) +endif() +set(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH + "Installation directory for CMake files") + +# Make relative paths absolute (needed later on) +foreach(p LIB BIN INCLUDE CMAKE) + set(var INSTALL_${p}_DIR) + if(NOT IS_ABSOLUTE "${${var}}") + set(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") + endif() +endforeach() + if(WIN32) add_dependencies(tensorflow tensorflow_static) endif(WIN32) @@ -99,14 +120,57 @@ target_include_directories(tensorflow PUBLIC $ $) -install(TARGETS tensorflow EXPORT tensorflow_export - RUNTIME DESTINATION bin - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib) +# Add all targets to build-tree export set +export(TARGETS tensorflow + FILE ${PROJECT_BINARY_DIR}/TensorflowTargets.cmake) + +# Export the package for use from the build-tree +export(PACKAGE Tensorflow) + +# Create the TensorflowConfig.cmake and TensorflowConfigVersion files +file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" + "${INSTALL_INCLUDE_DIR}") +# for the build tree +set(CONF_INCLUDE_DIRS "${tensorflow_source_dir}" + "${PROJECT_BINARY_DIR}" + "${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src" + "${CMAKE_CURRENT_BINARY_DIR}/nsync/install/include" # Please if there is a better directory + "${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/Eigen/" + "${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive/" + "${tensorflow_source_dir}/third_party/eigen3/" + "${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/") +configure_file(TensorflowConfig.cmake.in + "${PROJECT_BINARY_DIR}/TensorflowConfig.cmake" @ONLY) +# for the install tree, yet to be complete +set(CONF_INCLUDE_DIRS "\${TENSORFLOW_CMAKE_DIR}/${REL_INCLUDE_DIR}") +configure_file(TensorflowConfig.cmake.in + "${PROJECT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/TensorflowConfig.cmake" @ONLY) +# for both +configure_file(TensorflowConfigVersion.cmake.in + "${PROJECT_BINARY_DIR}/TensorflowConfigVersion.cmake" @ONLY) + +# install(TARGETS tensorflow EXPORT tensorflow_export +# RUNTIME DESTINATION ${INSTALL_BIN_DIR} +# LIBRARY DESTINATION ${INSTALL_LIB_DIR} +# ARCHIVE DESTINATION ${INSTALL_LIB_DIR}) + +# install(EXPORT tensorflow_export +# FILE TensorflowConfig.cmake +# DESTINATION ${INSTALL_CMAKE_DIR}) -install(EXPORT tensorflow_export - FILE TensorflowConfig.cmake - DESTINATION lib/cmake) +install(FILES + "${PROJECT_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/TensorflowConfig.cmake" + "${PROJECT_BINARY_DIR}/TensorflowConfigVersion.cmake" + DESTINATION "${INSTALL_CMAKE_DIR}" COMPONENT dev) + +# install the export set for use with the install-tree +install(EXPORT TensorflowTargets + DESTINATION ${INSTALL_CMAKE_DIR}) + +install(TARGETS tensorflow EXPORT TensorflowTargets + RUNTIME DESTINATION ${INSTALL_BIN_DIR} + LIBRARY DESTINATION ${INSTALL_LIB_DIR} + ARCHIVE DESTINATION ${INSTALL_LIB_DIR}) # install necessary headers # tensorflow headers @@ -144,4 +208,4 @@ install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/ DESTINATION include/third_party/eigen3) # unsupported Eigen directory install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/ - DESTINATION include/unsupported/Eigen) + DESTINATION include/unsupported/Eigen) \ No newline at end of file -- GitLab From 6dd6cc95087066274eccc0205043f1c657c8a558 Mon Sep 17 00:00:00 2001 From: JackyKo Date: Sun, 28 Jan 2018 17:26:22 +0800 Subject: [PATCH 0011/1078] readme add cmake gui build and c++ interface --- tensorflow/contrib/cmake/README.md | 160 ++++++++++++++++++++++++++--- 1 file changed, 147 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 8f85a75ee4..f6497c2138 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -2,9 +2,9 @@ TensorFlow CMake build ====================== This directory contains CMake files for building TensorFlow on Microsoft -Windows. [CMake](https://cmake.org) is a cross-platform tool that can +Windows and Linux. [CMake](https://cmake.org) is a cross-platform tool that can generate build scripts for multiple build systems, including Microsoft -Visual Studio. +Visual Studio and GCC. The method suppose could work on MacOS, but not tested yet. **N.B.** We provide Linux build instructions primarily for the purpose of testing the build. We recommend using the standard Bazel-based build on @@ -13,12 +13,13 @@ Linux. Current Status -------------- -CMake can be used to build TensorFlow on Windows. See the [getting started documentation](https://www.tensorflow.org/install/install_windows) -for instructions on how to install a pre-built TensorFlow package on Windows. +CMake can be used to build TensorFlow on all platforms. See the [getting started documentation](https://www.tensorflow.org/install/install_windows) +for instructions on how to install a pre-built TensorFlow package on Windows and Linux. Procedure in MacOS is similar to the Linux build. ### Current known limitations * It is not possible to load a custom Op library. * GCS file system is not supported. +* Debug build is not available since python no longer release debug library. ## Building with CMake @@ -35,7 +36,7 @@ bindings. * [SWIG](http://www.swig.org/download.html) * Additional pre-requisites for Microsoft Windows: - - Visual Studio 2015 + - Visual Studio 2015 (latest version of MSVC 2017 is not suppored by CUDA yet, try it on your own) - Python 3.5 - NumPy 1.11.0 or later @@ -47,12 +48,12 @@ bindings. ### Known-good configurations * Microsoft Windows 10 - - Microsoft Visual Studio Enterprise 2015 with Visual C++ 2015 + - Microsoft Visual Studio Enterprise/ Community 2015 with Visual C++ 2015 - [Anaconda 4.1.1 (Python 3.5 64-bit)](https://www.anaconda.com/download/) - [Git for Windows version 2.9.2.windows.1](https://git-scm.com/download/win) - [swigwin-3.0.10](http://www.swig.org/download.html) - - [NVidia CUDA Toolkit 8.0](https://developer.nvidia.com/cuda-downloads) - - [NVidia CUDNN 5.1](https://developer.nvidia.com/cudnn) + - [NVidia CUDA Toolkit 9.0](https://developer.nvidia.com/cuda-downloads) + - [NVidia CUDNN 7](https://developer.nvidia.com/cudnn) - [CMake 3.6](https://cmake.org/files/v3.6/cmake-3.6.3-win64-x64.msi) * Ubuntu 14.04 @@ -60,8 +61,8 @@ bindings. - Docker 1.9.1 (for automated testing) ### Current known limitations - - The Python package supports **Python 3.5 only**, because that is the only - version for which standard Python binaries exist and those binaries are + - The Python package supports **Python 3.5/3.6 only**, because these are the only + versions for which standard Python binaries exist and those binaries are compatible with the TensorFlow runtime. (On Windows, the standard Python binaries for versions earlier than 3.5 were compiled with older compilers that do not have all of the features (e.g. C++11 support) needed to compile @@ -98,8 +99,141 @@ We are actively working on improving CMake and Windows support, and addressing these limitations. We would appreciate pull requests that implement missing ops or APIs. +CMake GUI build (all platforms) +================================== +Install from CMake GUI would be a convenient way to generate C++ build projects. The software supports Windows, MacOS and Linux, while the posix platform provides an extra ccmake binary to run command line GUI. Both working principal of cmake, ccmake and cmake-gui are the same, the only difference is by providing suitable interface for project configuration and dependency setting. + +0. Pre-buid checklist: + * Compiler (GCC for Linux, MSVC for Windows) + * Make sure compiler directory has been set to system path + * CUDA 9.0 (GPU build) + * CUDNN (GPU build) + * NCCL (GPU build on Linux) + * SWIG (python binding) +1. Start CMake GUI +2. Click on `Browse Source` and direct to the the folder `/tensorflow/contrib/cmake` +3. Click on `Browse Build` and spectify a location that you want tensorflow to be build +4. Click on `Configure`, a new window will be prompted out, specify the generator mode for the project generation. For Windows, choose `Visual Studio Win64`, for Linux, choose `Unix Makefiles`, then press `Finish`. Wait for a moment, the default project dependecy would automatically generate. +5. There are a few options that you can customize your own build. **The setting here is crucial for a sucessful build, please check all items carefully.** + * `tensorflow_BUILD_ALL_KERNELS` should alway be `on` + * `tensorflow_BUILD_CC_EXAMPLE` is default to be `on`. This can help you to test build (optional) + * `tensorflow_BUILD_CONTRIB_KERNELS` is default to be `on`, but it won't affect tensorflow function, turn it to `off` if you want a slim build. (optional) + * `tensorflow_BUILD_PYTHON_BINDING` is default to be `on`. Set to `off` if you don't need python interaface. If SWIG is not in system path, you need set it manually. (optional) + * `tensorflow_BUILD_SHARED_LIB` is default to be `off`. Set to `on` if you want the c++ interface. (optional) + * `tensorflow_ENABLE_GPU` is default to be `off`. Set to `on` if you want GPU support. It will search CUDA and CUDNN dependecies if you have set them to system path, otherwise CMake would prompt error and request you to set it manually. (optional) + * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. In Windows you may turn it to `off`. + * `tensorflow_ENABLE_POSITION_INDEPENDENT_CODE` should always be `on` + * `tensorflow_ENABLE_SNAPPY_SUPPORT` should always be `on` + * `tensorflow_OPTIMIZE_FOR_NATIVE_ARCH` should always be `on` + * `CMAKE_INSTALL_PREFIX` is the location where the final package will be installed. You may change it to you own preferred path (optional) + + ##### Current known bug + * `WIN_CPU_SIMD_OPTIONS` cannot be turned on now. Going to be fix +6. After changing the configuration in step 5, press `Configure` again +7. If not error is found, press `Generate` + +#### Windows + +1. Open `tensorflow.sln` in the build folder (Windows). Change build type from `Debug` to `Release`. Choose `Build`->`Build Solution`. This may take more than hours of compilation. If everything is alright, the output window would show no error. + + ##### Python + In solution explorer, right click on `tf_python_build_pip_package` -> `build`. It will generate the wheel file in `/tf_python/dist`. Install with following command: + + ```pip install --upgrade tensorflow-.whl``` + + ***The wheel name varies depends on you config. Change to your own wheel filename.*** + + Remind that some pip installation requires administrator right command prompt. + + ##### C++ + You can directly use the build folder tree for C++ interface with cmake. If you want to do installation for api releasing, right click on `Install` -> `build`. The headers and library will be installed in the directory specify by `CMAKE_INSTALL_PREFIX` during configuration. + +2. For smaller RAM computer, it is noticed that out of heap space error appears. Change to command prompt build is an alternative to do step 1. + + Open `VS2015 x64 Native Tools Command Prompt`. You can open it by press `Start`, then type the binary name. Use `VS2017 x64 Native Tools Command Prompt` if you are using MSVC 2017. + + ##### Python + Directly build python wheel package by following command: + ```MSBuild /p:Configuration=Release ``` + + Remember to change `` to the actual path of the file, it can be found at the root of build directory + + Install the wheel file generated as instructed by step 1. + + ##### C++ interface + Build from VS native toolchain with following command: + ```MSBuild /p:Configuration=Release ``` + + Headers are discretely located in the build folders. Tensorflow library can be found at `/Release`, namely `tensorflow.dll` and `tensorflow.lib`. + + * Build to install for api release (optional): + ```MSBuild /p:Configuration=Release ``` + + Remember to change `` and `` to the actual path of the file, it can be found at the root of build directory. + +#### Linux/MacOS (command line GNU build) + +1. Open the terminal, change working directory to the one specified in step 3. + +2. Type the following command: + + ```make -sj all``` + + ##### Python + + ``` + make -sj tf_python_build_pip_package + cd tf_python + pip install --upgrade tensorflow-.whl + ``` + + ##### C++ interface + + ```make -sj install``` + + Where `` is the threads used for the compilation, change to any integer less or equal to your computer's maxiumum thread number. + + Headers are discretely located in the build folders. Tensorflow library can be found at ``, namely `tensorflow.so` (Linux) or `tensorflow.dylib` (MacOS). + +#### Start a Tensorflow C++ project with CMake +Here we assume that you have basic knowledge on gathering dependency with `CMakeLists.txt`. Here we introduce how the C++ api works with [official hello world tutorial](https://www.tensorflow.org/api_guides/cc/guide). + +1. Create a new working directory and create a new text file named `CMakeLists.txt` and the c++ file `main.cxx` +2. Fill in the `main.cxx` with the code provided in [official c++ api basic](https://www.tensorflow.org/api_guides/cc/guide). +3. Fill in the `CMakeLists.txt` with following code: + ``` cmake + cmake_minimum_required (VERSION 2.6) + project (tf_hello) + + # Tensorflow + find_package(Tensorflow REQUIRED) + include_directories(${TENSORFLOW_INCLUDE_DIRS}) + + # compiler setting required by tensorflow, to be tested on all compilers + # currently only tested on MSVC and GCC + if (${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) + add_definitions(-DCOMPILER_MSVC) + elseif (${CMAKE_CXX_COMPILER_ID} STREQUAL GNU) + if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS "3") + add_definitions(-DCOMPILER_GCC3) + else() + add_definitions(-D__GNUC__) + endif() + else() + message(ERROR " compiler ${CMAKE_CXX_COMPILER_ID} not supported by this CMakeList.txt, under development") + endif() + + add_executable(tf_hello main.cxx) + target_link_libraries(tf_hello ${TENSORFLOW_LIBRARIES}) + ``` +4. Configure the folder with cmake-gui, an error should be prompted out, requesting you to locate the folder containing `TensorflowConfig.cmake`. This file can be found at `` or `` (for those have build install in previous steps). + +5. Configure again, generate the project. +6. Compile the project with `Release` config (Windows). For Linux users, just compile the project. +7. Copy the `tensorflow.dll`(Windows)/`tensorflow.so`(Linux) from build directory to the build folder containing `tf_hello` binary. +8. Run `tf_hello` binary -Step-by-step Windows build +Step-by-step Windows build (command prompt) ========================== 1. Install the pre-requisites detailed above, and set up your environment. @@ -122,7 +256,7 @@ Step-by-step Windows build For example: ``` - D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\bin + D:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\bin D:\local\cuda\bin ``` @@ -261,4 +395,4 @@ $ cd tensorflow $ tensorflow/tools/ci_build/ci_build.sh CMAKE tensorflow/tools/ci_build/builds/cmake.sh ``` -That's it. Dependencies included. +That's it. Dependencies included. \ No newline at end of file -- GitLab From b18673c9921f3f692c4c8f4fb881769d392f656d Mon Sep 17 00:00:00 2001 From: JackyKo Date: Sun, 28 Jan 2018 17:41:28 +0800 Subject: [PATCH 0012/1078] readme grpc update --- tensorflow/contrib/cmake/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index f6497c2138..633d61a80e 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -121,7 +121,7 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. * `tensorflow_BUILD_PYTHON_BINDING` is default to be `on`. Set to `off` if you don't need python interaface. If SWIG is not in system path, you need set it manually. (optional) * `tensorflow_BUILD_SHARED_LIB` is default to be `off`. Set to `on` if you want the c++ interface. (optional) * `tensorflow_ENABLE_GPU` is default to be `off`. Set to `on` if you want GPU support. It will search CUDA and CUDNN dependecies if you have set them to system path, otherwise CMake would prompt error and request you to set it manually. (optional) - * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. In Windows you may turn it to `off`. + * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. In Windows you need to turn it to `off`. * `tensorflow_ENABLE_POSITION_INDEPENDENT_CODE` should always be `on` * `tensorflow_ENABLE_SNAPPY_SUPPORT` should always be `on` * `tensorflow_OPTIMIZE_FOR_NATIVE_ARCH` should always be `on` @@ -333,7 +333,7 @@ Step-by-step Windows build (command prompt) * `-Dtensorflow_ENABLE_GRPC_SUPPORT=(ON|OFF)`. Defaults to `ON`. Include gRPC support and the distributed client and server code in the TensorFlow - runtime. + runtime. * `-Dtensorflow_ENABLE_SSL_SUPPORT=(ON|OFF)`. Defaults to `OFF`. Include SSL support (for making secure HTTP requests) in the TensorFlow runtime. -- GitLab From 51b8978fe2697881d004d736460fde5ef0eb8a2c Mon Sep 17 00:00:00 2001 From: JackyKo Date: Sun, 28 Jan 2018 20:46:58 +0800 Subject: [PATCH 0013/1078] fix for cmake build --- tensorflow/core/util/cuda_device_functions.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h index f787687f66..a994398c42 100644 --- a/tensorflow/core/util/cuda_device_functions.h +++ b/tensorflow/core/util/cuda_device_functions.h @@ -28,12 +28,21 @@ limitations under the License. #include #include +#ifdef GOOGLE_PLATFORM #include "cuda/include/cuda.h" #include "cuda/include/device_functions.h" +#else +#include "cuda.h" +#include "device_functions.h" +#endif #include "tensorflow/core/platform/types.h" #if CUDA_VERSION >= 7050 +#ifdef GOOGLE_PLATFORM #include "cuda/include/cuda_fp16.h" +#else +#include "cuda_fp16.h" +#endif #endif // CUDA_VERSION >= 7050 namespace tensorflow { -- GitLab From f6911068262751adba24a26344fed8394d042a07 Mon Sep 17 00:00:00 2001 From: JackyKo Date: Sun, 28 Jan 2018 20:55:20 +0800 Subject: [PATCH 0014/1078] cmake cuda fix --- tensorflow/core/util/cuda_device_functions.h | 4 ++-- tensorflow/core/util/cuda_launch_config.h | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h index a994398c42..66129d2425 100644 --- a/tensorflow/core/util/cuda_device_functions.h +++ b/tensorflow/core/util/cuda_device_functions.h @@ -28,7 +28,7 @@ limitations under the License. #include #include -#ifdef GOOGLE_PLATFORM +#ifdef PLATFORM_GOOGLE #include "cuda/include/cuda.h" #include "cuda/include/device_functions.h" #else @@ -38,7 +38,7 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #if CUDA_VERSION >= 7050 -#ifdef GOOGLE_PLATFORM +#ifdef PLATFORM_GOOGLE #include "cuda/include/cuda_fp16.h" #else #include "cuda_fp16.h" diff --git a/tensorflow/core/util/cuda_launch_config.h b/tensorflow/core/util/cuda_launch_config.h index 3ea33ee6cf..7e552c7dce 100644 --- a/tensorflow/core/util/cuda_launch_config.h +++ b/tensorflow/core/util/cuda_launch_config.h @@ -21,7 +21,11 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#ifdef PLATFORM_GOOGLE #include "cuda/include/cuda.h" +#else +#include "cuda.h" +#endif #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" -- GitLab From 0e5faafaa3977584efd7ab6aab114f8c0689c247 Mon Sep 17 00:00:00 2001 From: JackyKo Date: Mon, 29 Jan 2018 00:24:09 +0800 Subject: [PATCH 0015/1078] update protobuf to 3.5.1.1 --- tensorflow/contrib/cmake/external/protobuf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index aedb793d2a..31a13b980e 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) +set(PROTOBUF_TAG 860bd12fec5c69e6529565165532b3d5108a7d97) if(WIN32) set(protobuf_STATIC_LIBRARIES -- GitLab From cf101a9cd64dc9f101a27c45ae2a52e4630b6fb7 Mon Sep 17 00:00:00 2001 From: JackyKo Date: Mon, 29 Jan 2018 00:50:21 +0800 Subject: [PATCH 0016/1078] windows SMID option imporve --- tensorflow/contrib/cmake/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index c26c7cd3b7..a7e6e8262d 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -31,10 +31,15 @@ option(tensorflow_BUILD_PYTHON_TESTS "Build python unit tests " OFF) option(tensorflow_BUILD_MORE_PYTHON_TESTS "Build more python unit tests for contrib packages" OFF) option(tensorflow_BUILD_SHARED_LIB "Build TensorFlow as a shared library" OFF) option(tensorflow_OPTIMIZE_FOR_NATIVE_ARCH "Enable compiler optimizations for the native processor architecture (if available)" ON) -option(tensorflow_WIN_CPU_SIMD_OPTIONS "Enables CPU SIMD instructions") option(tensorflow_ENABLE_SNAPPY_SUPPORT "Enable SNAPPY compression support" ON) option(tensorflow_DISABLE_EIGEN_FORCEINLINE "Disable forceinline, to speed up build on windows." OFF) +# enable SIMD instructions with MSVC, as AVX and SSE +if (WIN32) +SET(tensorflow_WIN_CPU_SIMD_OPTIONS "/arch:AVX" CACHE STRING "Enables CPU SIMD instructions") +SET_PROPERTY(CACHE tensorflow_WIN_CPU_SIMD_OPTIONS PROPERTY STRINGS /arch:AVX) +endif() + # GPU, CUDA and cuDNN options option(tensorflow_ENABLE_GPU "Enable GPU support" OFF) set(tensorflow_CUDA_VERSION "9.0" CACHE STRING "CUDA version to build against") -- GitLab From 9b8ecce73d4ad7d5753ecabe9c511566b31f120a Mon Sep 17 00:00:00 2001 From: JackyKo Date: Mon, 29 Jan 2018 00:52:45 +0800 Subject: [PATCH 0017/1078] GRPC updated --- tensorflow/contrib/cmake/README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 633d61a80e..dc75250265 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -121,14 +121,12 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. * `tensorflow_BUILD_PYTHON_BINDING` is default to be `on`. Set to `off` if you don't need python interaface. If SWIG is not in system path, you need set it manually. (optional) * `tensorflow_BUILD_SHARED_LIB` is default to be `off`. Set to `on` if you want the c++ interface. (optional) * `tensorflow_ENABLE_GPU` is default to be `off`. Set to `on` if you want GPU support. It will search CUDA and CUDNN dependecies if you have set them to system path, otherwise CMake would prompt error and request you to set it manually. (optional) - * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. In Windows you need to turn it to `off`. + * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. In Windows this need to be `on` for gpu build. * `tensorflow_ENABLE_POSITION_INDEPENDENT_CODE` should always be `on` * `tensorflow_ENABLE_SNAPPY_SUPPORT` should always be `on` * `tensorflow_OPTIMIZE_FOR_NATIVE_ARCH` should always be `on` * `CMAKE_INSTALL_PREFIX` is the location where the final package will be installed. You may change it to you own preferred path (optional) - ##### Current known bug - * `WIN_CPU_SIMD_OPTIONS` cannot be turned on now. Going to be fix 6. After changing the configuration in step 5, press `Configure` again 7. If not error is found, press `Generate` -- GitLab From 9284b3cc7782dd7d7f9459f7beec7cddec1c3f9a Mon Sep 17 00:00:00 2001 From: JackyKo Date: Mon, 29 Jan 2018 01:21:32 +0800 Subject: [PATCH 0018/1078] python build in linux failed --- tensorflow/contrib/cmake/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index dc75250265..a18a0c8e37 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -179,6 +179,10 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. ##### Python + **Important Note** CMake generated python wheel for Linux/MacOs is currently under development. Please use bazel build. + + Follow code is an expected Linux/MacOS python package build after development work is completed. + ``` make -sj tf_python_build_pip_package cd tf_python -- GitLab From 4219092017be9523cb2be7106111b1a4a1016d0c Mon Sep 17 00:00:00 2001 From: jackyko Date: Mon, 29 Jan 2018 11:33:04 +0800 Subject: [PATCH 0019/1078] grpc require compiler update --- tensorflow/contrib/cmake/README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index a18a0c8e37..ba5f4e58c5 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -35,6 +35,12 @@ bindings. * [SWIG](http://www.swig.org/download.html) +* [Perl](https://www.perl.org/get.html) + +* [Go](https://golang.org/) + +* [NASM](http://www.nasm.us/) + * Additional pre-requisites for Microsoft Windows: - Visual Studio 2015 (latest version of MSVC 2017 is not suppored by CUDA yet, try it on your own) - Python 3.5 @@ -104,12 +110,16 @@ CMake GUI build (all platforms) Install from CMake GUI would be a convenient way to generate C++ build projects. The software supports Windows, MacOS and Linux, while the posix platform provides an extra ccmake binary to run command line GUI. Both working principal of cmake, ccmake and cmake-gui are the same, the only difference is by providing suitable interface for project configuration and dependency setting. 0. Pre-buid checklist: + The following binary/libraries should be setted in system path, otherwise you need to set manualy via cmake. * Compiler (GCC for Linux, MSVC for Windows) * Make sure compiler directory has been set to system path * CUDA 9.0 (GPU build) * CUDNN (GPU build) * NCCL (GPU build on Linux) * SWIG (python binding) + * Perl (GPU build on Windows, required by grpc) + * Go (GPU build on Windows, required by grpc) + * NASM (GPU build on Windows, required by grpc) 1. Start CMake GUI 2. Click on `Browse Source` and direct to the the folder `/tensorflow/contrib/cmake` 3. Click on `Browse Build` and spectify a location that you want tensorflow to be build @@ -121,7 +131,7 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. * `tensorflow_BUILD_PYTHON_BINDING` is default to be `on`. Set to `off` if you don't need python interaface. If SWIG is not in system path, you need set it manually. (optional) * `tensorflow_BUILD_SHARED_LIB` is default to be `off`. Set to `on` if you want the c++ interface. (optional) * `tensorflow_ENABLE_GPU` is default to be `off`. Set to `on` if you want GPU support. It will search CUDA and CUDNN dependecies if you have set them to system path, otherwise CMake would prompt error and request you to set it manually. (optional) - * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. In Windows this need to be `on` for gpu build. + * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. In Windows this need to be `on` for gpu build. Reminded that Perl, Go and NASM is required for this option in windows. * `tensorflow_ENABLE_POSITION_INDEPENDENT_CODE` should always be `on` * `tensorflow_ENABLE_SNAPPY_SUPPORT` should always be `on` * `tensorflow_OPTIMIZE_FOR_NATIVE_ARCH` should always be `on` @@ -135,6 +145,7 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. 1. Open `tensorflow.sln` in the build folder (Windows). Change build type from `Debug` to `Release`. Choose `Build`->`Build Solution`. This may take more than hours of compilation. If everything is alright, the output window would show no error. ##### Python + In solution explorer, right click on `tf_python_build_pip_package` -> `build`. It will generate the wheel file in `/tf_python/dist`. Install with following command: ```pip install --upgrade tensorflow-.whl``` @@ -144,6 +155,7 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. Remind that some pip installation requires administrator right command prompt. ##### C++ + You can directly use the build folder tree for C++ interface with cmake. If you want to do installation for api releasing, right click on `Install` -> `build`. The headers and library will be installed in the directory specify by `CMAKE_INSTALL_PREFIX` during configuration. 2. For smaller RAM computer, it is noticed that out of heap space error appears. Change to command prompt build is an alternative to do step 1. @@ -151,7 +163,9 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. Open `VS2015 x64 Native Tools Command Prompt`. You can open it by press `Start`, then type the binary name. Use `VS2017 x64 Native Tools Command Prompt` if you are using MSVC 2017. ##### Python + Directly build python wheel package by following command: + ```MSBuild /p:Configuration=Release ``` Remember to change `` to the actual path of the file, it can be found at the root of build directory -- GitLab From 4dd3d50cf8f1dcec7f4a4268b50bcea7c395a9a2 Mon Sep 17 00:00:00 2001 From: jackyko Date: Mon, 29 Jan 2018 13:34:04 +0800 Subject: [PATCH 0020/1078] add perl, NASM and Go request --- tensorflow/contrib/cmake/external/grpc.cmake | 25 ++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 28b85e0a19..03205f58ca 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -14,6 +14,29 @@ # ============================================================================== include (ExternalProject) +if (WIN32) + # perl + find_package(Perl REQUIRED) + + # nasm + if(NOT NASM_COMPILER) + find_program(NASM_COMPILER nasm + "$ENV{ProgramFiles}/NASM" DOC "path to NASM.exe") + endif() + if(NOT NASM_COMPILER) + message(FATAL_ERROR "NASM not found!" DOC "path to go.exe") + endif() + + # go + if(NOT GO) + find_program(GO go + "$ENV{ProgramFiles}/go") + endif() + if(NOT GO) + message(FATAL_ERROR "Go not found!") + endif() +endif() + set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include) set(GRPC_URL https://github.com/grpc/grpc.git) set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc) @@ -51,6 +74,8 @@ ExternalProject_Add(grpc -DPROTOBUF_INCLUDE_DIRS:STRING=${PROTOBUF_INCLUDE_DIRS} -DPROTOBUF_LIBRARIES:STRING=${protobuf_STATIC_LIBRARIES} -DZLIB_ROOT:STRING=${ZLIB_INSTALL} + -DCMAKE_ASM_NASM_COMPILER:STRING=${NASM_COMPILER} + -DGO_EXECUTABLE:STRING=${GO} ) # grpc/src/core/ext/census/tracing.c depends on the existence of openssl/rand.h. -- GitLab From 6b6d24015f16bd150571920c013b5fd6f3d07f4b Mon Sep 17 00:00:00 2001 From: jackyko Date: Mon, 29 Jan 2018 13:41:33 +0800 Subject: [PATCH 0021/1078] search path for go update --- tensorflow/contrib/cmake/external/grpc.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 03205f58ca..944c49e48a 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -30,7 +30,7 @@ if (WIN32) # go if(NOT GO) find_program(GO go - "$ENV{ProgramFiles}/go") + "$ENV{ProgramFiles}/Go/bin") endif() if(NOT GO) message(FATAL_ERROR "Go not found!") -- GitLab From 4ea80c7e96ab059360faec42193ec67ef24fdbec Mon Sep 17 00:00:00 2001 From: jackyko Date: Mon, 29 Jan 2018 16:43:10 +0800 Subject: [PATCH 0022/1078] version update to 1.5.0 --- tensorflow/tools/pip_package/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 62df6453fb..306bfab161 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.5.0-rc1' +_VERSION = '1.5.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From 8e337006a72f6316a4afa7f1173f819b6baca1ba Mon Sep 17 00:00:00 2001 From: jackyko Date: Mon, 29 Jan 2018 16:45:50 +0800 Subject: [PATCH 0023/1078] install package add abseil for grpc support --- tensorflow/tools/pip_package/setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 306bfab161..083d002c83 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -37,9 +37,10 @@ REQUIRED_PACKAGES = [ 'gast >= 0.2.0', 'numpy >= 1.12.1', 'six >= 1.10.0', - 'protobuf >= 3.4.0', + 'protobuf >= 3.5.0', 'tensorflow-tensorboard >= 0.4.0', 'termcolor >= 1.1.0', + 'absl-py >= 0.1.9' ] project_name = 'tensorflow' -- GitLab From 4a35d10279b8b28d56a83a5ea05879d2cac16bb5 Mon Sep 17 00:00:00 2001 From: jackyko Date: Mon, 29 Jan 2018 17:38:17 +0800 Subject: [PATCH 0024/1078] linux gpu build fix --- tensorflow/contrib/cmake/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index a7e6e8262d..3ec630b3d6 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -401,8 +401,12 @@ if (tensorflow_ENABLE_GPU) cudnn_version_number=${tensorflow_CUDNN_VERSION}) endif(WIN32) else(tensorflow_ENABLE_GPU) - set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value - msvcp_dll_name=msvcp140.dll) + if(WIN32) + set(tensorflow_BUILD_INFO_FLAGS --build_config cpu --key_value + msvcp_dll_name=msvcp140.dll) + else() + set(tensorflow_BUILD_INFO_FLAGS --build_config cpu) + endif() endif(tensorflow_ENABLE_GPU) if(tensorflow_BUILD_PYTHON_BINDINGS) -- GitLab From 547731fe8ffcc994870dda4d70eeaf8a0686427c Mon Sep 17 00:00:00 2001 From: jackyko Date: Tue, 30 Jan 2018 11:16:05 +0800 Subject: [PATCH 0025/1078] add linux support --- tensorflow/contrib/cmake/external/grpc.cmake | 55 +++++++++++++------- 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 944c49e48a..f3119aa749 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -14,28 +14,45 @@ # ============================================================================== include (ExternalProject) -if (WIN32) - # perl - find_package(Perl REQUIRED) +# perl +find_package(Perl REQUIRED) - # nasm - if(NOT NASM_COMPILER) - find_program(NASM_COMPILER nasm - "$ENV{ProgramFiles}/NASM" DOC "path to NASM.exe") - endif() - if(NOT NASM_COMPILER) - message(FATAL_ERROR "NASM not found!" DOC "path to go.exe") +# nasm +if(NOT CMAKE_ASM_NASM_COMPILER) + if (WIN32) + find_program(CMAKE_ASM_NASM_COMPILER + NAMES nasm + PATHS "$ENV{ProgramFiles}/NASM" + DOC "path to NASM compiler") + else() + find_program(CMAKE_ASM_NASM_COMPILER + NAMES nasm + PATHS "/usr/bin" + DOC "path to NASM compiler") endif() +endif() + +if(NOT NASM_COMPILER) + message(FATAL_ERROR "NASM not found!") +endif() - # go - if(NOT GO) - find_program(GO go - "$ENV{ProgramFiles}/Go/bin") - endif() - if(NOT GO) - message(FATAL_ERROR "Go not found!") +# go +if(NOT GO_EXECUTABLE) + if (WIN32) + find_program(GO_EXECUTABLE + NAMES go + PATHS "$ENV{ProgramFiles}/Go/bin" + DOC "path to go compiler") + else() + find_program(GO_EXECUTABLE + NAMES go + PATHS "/usr/bin" "/usr/local/bin" "/usr/local/go" + DOC "path to go compiler") endif() endif() +if(NOT GO_EXECUTABLE) + message(FATAL_ERROR "Go not found!") +endif() set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include) set(GRPC_URL https://github.com/grpc/grpc.git) @@ -74,8 +91,8 @@ ExternalProject_Add(grpc -DPROTOBUF_INCLUDE_DIRS:STRING=${PROTOBUF_INCLUDE_DIRS} -DPROTOBUF_LIBRARIES:STRING=${protobuf_STATIC_LIBRARIES} -DZLIB_ROOT:STRING=${ZLIB_INSTALL} - -DCMAKE_ASM_NASM_COMPILER:STRING=${NASM_COMPILER} - -DGO_EXECUTABLE:STRING=${GO} + -DCMAKE_ASM_NASM_COMPILER:STRING=${CMAKE_ASM_NASM_COMPILER} + -DGO_EXECUTABLE:STRING=${GO_EXECUTABLE} ) # grpc/src/core/ext/census/tracing.c depends on the existence of openssl/rand.h. -- GitLab From 70127d3891ac0dcf291727eee91dfca6266bd6ee Mon Sep 17 00:00:00 2001 From: jackyko Date: Tue, 30 Jan 2018 11:17:51 +0800 Subject: [PATCH 0026/1078] nasm cmake fix --- tensorflow/contrib/cmake/external/grpc.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index f3119aa749..4a2989a945 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -32,7 +32,7 @@ if(NOT CMAKE_ASM_NASM_COMPILER) endif() endif() -if(NOT NASM_COMPILER) +if(NOT CMAKE_ASM_NASM_COMPILER) message(FATAL_ERROR "NASM not found!") endif() -- GitLab From eb77281b907075ef7cc3699f9222b839527a3b6b Mon Sep 17 00:00:00 2001 From: jackyko Date: Tue, 30 Jan 2018 14:03:09 +0800 Subject: [PATCH 0027/1078] change to version 3.5.1 --- tensorflow/contrib/cmake/external/protobuf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index 31a13b980e..269f1c5c89 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG 860bd12fec5c69e6529565165532b3d5108a7d97) +set(PROTOBUF_TAG 106ffc04be1abf3ff3399f54ccf149815b287dd9) if(WIN32) set(protobuf_STATIC_LIBRARIES -- GitLab From ae797531d6ea09b6fe8a30cada74be5dffbbac12 Mon Sep 17 00:00:00 2001 From: jackyko Date: Tue, 30 Jan 2018 14:05:37 +0800 Subject: [PATCH 0028/1078] change to 3.5.0 --- tensorflow/contrib/cmake/external/protobuf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index 269f1c5c89..d6fdb37406 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG 106ffc04be1abf3ff3399f54ccf149815b287dd9) +set(PROTOBUF_TAG 2761122b810fe8861004ae785cc3ab39f384d342) if(WIN32) set(protobuf_STATIC_LIBRARIES -- GitLab From 81f3f47a55aa70b12dccdaffafe7bf1f53f42818 Mon Sep 17 00:00:00 2001 From: jackyko Date: Tue, 30 Jan 2018 18:48:58 +0800 Subject: [PATCH 0029/1078] grpc build according to protobuf version --- tensorflow/contrib/cmake/external/grpc.cmake | 70 ++++++++++--------- .../contrib/cmake/external/protobuf.cmake | 11 ++- .../ci_build/windows/cpu/cmake/run_build.bat | 3 +- .../ci_build/windows/gpu/cmake/run_build.bat | 3 +- 4 files changed, 50 insertions(+), 37 deletions(-) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 4a2989a945..0de0433619 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -17,41 +17,43 @@ include (ExternalProject) # perl find_package(Perl REQUIRED) -# nasm -if(NOT CMAKE_ASM_NASM_COMPILER) - if (WIN32) - find_program(CMAKE_ASM_NASM_COMPILER - NAMES nasm - PATHS "$ENV{ProgramFiles}/NASM" - DOC "path to NASM compiler") - else() - find_program(CMAKE_ASM_NASM_COMPILER - NAMES nasm - PATHS "/usr/bin" - DOC "path to NASM compiler") - endif() -endif() - -if(NOT CMAKE_ASM_NASM_COMPILER) - message(FATAL_ERROR "NASM not found!") -endif() +if(${PROTOBUF_VERSION} STREQUAL "3.5.0") + # nasm + if(NOT CMAKE_ASM_NASM_COMPILER) + if (WIN32) + find_program(CMAKE_ASM_NASM_COMPILER + NAMES nasm + PATHS "$ENV{ProgramFiles}/NASM" + DOC "path to NASM compiler") + else() + find_program(CMAKE_ASM_NASM_COMPILER + NAMES nasm + PATHS "/usr/bin" + DOC "path to NASM compiler") + endif() + endif() -# go -if(NOT GO_EXECUTABLE) - if (WIN32) - find_program(GO_EXECUTABLE - NAMES go - PATHS "$ENV{ProgramFiles}/Go/bin" - DOC "path to go compiler") - else() - find_program(GO_EXECUTABLE - NAMES go - PATHS "/usr/bin" "/usr/local/bin" "/usr/local/go" - DOC "path to go compiler") - endif() -endif() -if(NOT GO_EXECUTABLE) - message(FATAL_ERROR "Go not found!") + if(NOT CMAKE_ASM_NASM_COMPILER) + message(FATAL_ERROR "NASM not found!") + endif() + + # go + if(NOT GO_EXECUTABLE) + if (WIN32) + find_program(GO_EXECUTABLE + NAMES go + PATHS "$ENV{ProgramFiles}/Go/bin" + DOC "path to go compiler") + else() + find_program(GO_EXECUTABLE + NAMES go + PATHS "/usr/bin" "/usr/local/bin" "/usr/local/go" + DOC "path to go compiler") + endif() + endif() + if(NOT GO_EXECUTABLE) + message(FATAL_ERROR "Go not found!") + endif() endif() set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index d6fdb37406..785ed4c739 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,16 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG 2761122b810fe8861004ae785cc3ab39f384d342) + +# enable choose protobuf versions +SET(PROTOBUF_VERSION "3.5.0" CACHE STRING "Protobuf version") +SET_PROPERTY(CACHE PROTOBUF_VERSION PROPERTY STRINGS "3.4.0" "3.5.0") + +if(${PROTOBUF_VERSION} STREQUAL "3.5.0") + set(PROTOBUF_TAG 2761122b810fe8861004ae785cc3ab39f384d342) +else() + set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) +endif() if(WIN32) set(protobuf_STATIC_LIBRARIES diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat index 957729bb37..11eb9f9771 100644 --- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat @@ -34,9 +34,10 @@ IF DEFINED DISABLE_FORCEINLINE (ECHO DISABLE_FORCEINLINE is set to %DISABLE_FORC SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" +SET PROTOBUF_VERSION="3.4.0" :: Run cmake to create Visual Studio Project files. -%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% +%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -DPROTOBUF_VERSION=%PROTOBUF_VERSION% :: Run msbuild in the resulting VS project files to build a pip package. %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat index 5a362de399..df70cae55d 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat @@ -35,9 +35,10 @@ IF DEFINED DISABLE_FORCEINLINE (ECHO DISABLE_FORCEINLINE is set to %DISABLE_FORC SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" +SET PROTOBUF_VERSION="3.4.0" :: Run cmake to create Visual Studio Project files. -%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% +%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -DPROTOBUF_VERSION=%PROTOBUF_VERSION% :: Run msbuild in the resulting VS project files to build a pip package. %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj -- GitLab From 946e0772432acc29663a5a4bf5738d0f429b6622 Mon Sep 17 00:00:00 2001 From: JackyKo Date: Tue, 30 Jan 2018 23:00:36 +0800 Subject: [PATCH 0030/1078] ssl change back to none --- tensorflow/contrib/cmake/external/grpc.cmake | 79 ++++++++++---------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 0de0433619..cd866f74c9 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -14,47 +14,45 @@ # ============================================================================== include (ExternalProject) -# perl -find_package(Perl REQUIRED) +# # perl +# find_package(Perl REQUIRED) -if(${PROTOBUF_VERSION} STREQUAL "3.5.0") - # nasm - if(NOT CMAKE_ASM_NASM_COMPILER) - if (WIN32) - find_program(CMAKE_ASM_NASM_COMPILER - NAMES nasm - PATHS "$ENV{ProgramFiles}/NASM" - DOC "path to NASM compiler") - else() - find_program(CMAKE_ASM_NASM_COMPILER - NAMES nasm - PATHS "/usr/bin" - DOC "path to NASM compiler") - endif() - endif() +# # nasm +# if(NOT CMAKE_ASM_NASM_COMPILER) +# if (WIN32) +# find_program(CMAKE_ASM_NASM_COMPILER +# NAMES nasm +# PATHS "$ENV{ProgramFiles}/NASM" +# DOC "path to NASM compiler") +# else() +# find_program(CMAKE_ASM_NASM_COMPILER +# NAMES nasm +# PATHS "/usr/bin" +# DOC "path to NASM compiler") +# endif() +# endif() - if(NOT CMAKE_ASM_NASM_COMPILER) - message(FATAL_ERROR "NASM not found!") - endif() +# if(NOT CMAKE_ASM_NASM_COMPILER) +# message(FATAL_ERROR "NASM not found!") +# endif() - # go - if(NOT GO_EXECUTABLE) - if (WIN32) - find_program(GO_EXECUTABLE - NAMES go - PATHS "$ENV{ProgramFiles}/Go/bin" - DOC "path to go compiler") - else() - find_program(GO_EXECUTABLE - NAMES go - PATHS "/usr/bin" "/usr/local/bin" "/usr/local/go" - DOC "path to go compiler") - endif() - endif() - if(NOT GO_EXECUTABLE) - message(FATAL_ERROR "Go not found!") - endif() -endif() +# # go +# if(NOT GO_EXECUTABLE) +# if (WIN32) +# find_program(GO_EXECUTABLE +# NAMES go +# PATHS "$ENV{ProgramFiles}/Go/bin" +# DOC "path to go compiler") +# else() +# find_program(GO_EXECUTABLE +# NAMES go +# PATHS "/usr/bin" "/usr/local/bin" "/usr/local/go" +# DOC "path to go compiler") +# endif() +# endif() +# if(NOT GO_EXECUTABLE) +# message(FATAL_ERROR "Go not found!") +# endif() set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include) set(GRPC_URL https://github.com/grpc/grpc.git) @@ -93,8 +91,9 @@ ExternalProject_Add(grpc -DPROTOBUF_INCLUDE_DIRS:STRING=${PROTOBUF_INCLUDE_DIRS} -DPROTOBUF_LIBRARIES:STRING=${protobuf_STATIC_LIBRARIES} -DZLIB_ROOT:STRING=${ZLIB_INSTALL} - -DCMAKE_ASM_NASM_COMPILER:STRING=${CMAKE_ASM_NASM_COMPILER} - -DGO_EXECUTABLE:STRING=${GO_EXECUTABLE} + -DgRPC_SSL_PROVIDER:STRING=NONE + # -DCMAKE_ASM_NASM_COMPILER:STRING=${CMAKE_ASM_NASM_COMPILER} + # -DGO_EXECUTABLE:STRING=${GO_EXECUTABLE} ) # grpc/src/core/ext/census/tracing.c depends on the existence of openssl/rand.h. -- GitLab From 2cd3a0edc2b9119f3434105ace28411731d403bc Mon Sep 17 00:00:00 2001 From: JackyKo Date: Tue, 30 Jan 2018 23:10:18 +0800 Subject: [PATCH 0031/1078] readme update --- tensorflow/contrib/cmake/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index ba5f4e58c5..da72ea320f 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -35,11 +35,11 @@ bindings. * [SWIG](http://www.swig.org/download.html) -* [Perl](https://www.perl.org/get.html) +* [Perl](https://www.perl.org/get.html) (optional, for full grpc build) -* [Go](https://golang.org/) +* [Go](https://golang.org/) (optional, for full grpc build) -* [NASM](http://www.nasm.us/) +* [NASM](http://www.nasm.us/)/[yasm](http://yasm.tortall.net/) (optional, for full grpc build) * Additional pre-requisites for Microsoft Windows: - Visual Studio 2015 (latest version of MSVC 2017 is not suppored by CUDA yet, try it on your own) @@ -117,9 +117,9 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. * CUDNN (GPU build) * NCCL (GPU build on Linux) * SWIG (python binding) - * Perl (GPU build on Windows, required by grpc) - * Go (GPU build on Windows, required by grpc) - * NASM (GPU build on Windows, required by grpc) + * Perl (GPU build on Windows, required by full function grpc, optional) + * Go (GPU build on Windows, required by full function grpc, optional) + * NASM (GPU build on Windows, required by full function grpc, optional) 1. Start CMake GUI 2. Click on `Browse Source` and direct to the the folder `/tensorflow/contrib/cmake` 3. Click on `Browse Build` and spectify a location that you want tensorflow to be build @@ -131,7 +131,7 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. * `tensorflow_BUILD_PYTHON_BINDING` is default to be `on`. Set to `off` if you don't need python interaface. If SWIG is not in system path, you need set it manually. (optional) * `tensorflow_BUILD_SHARED_LIB` is default to be `off`. Set to `on` if you want the c++ interface. (optional) * `tensorflow_ENABLE_GPU` is default to be `off`. Set to `on` if you want GPU support. It will search CUDA and CUDNN dependecies if you have set them to system path, otherwise CMake would prompt error and request you to set it manually. (optional) - * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. In Windows this need to be `on` for gpu build. Reminded that Perl, Go and NASM is required for this option in windows. + * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. This need to be `on` for a gpu build. Reminded that Perl, Go and NASM is required for this option if you want to build grpc with offical SSL support. * `tensorflow_ENABLE_POSITION_INDEPENDENT_CODE` should always be `on` * `tensorflow_ENABLE_SNAPPY_SUPPORT` should always be `on` * `tensorflow_OPTIMIZE_FOR_NATIVE_ARCH` should always be `on` -- GitLab From f3335e8b1e69d641162f4873c0f532778946f147 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Tue, 30 Jan 2018 23:15:40 +0800 Subject: [PATCH 0032/1078] Update run_build.bat --- tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat index 11eb9f9771..957729bb37 100644 --- a/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/cpu/cmake/run_build.bat @@ -34,10 +34,9 @@ IF DEFINED DISABLE_FORCEINLINE (ECHO DISABLE_FORCEINLINE is set to %DISABLE_FORC SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" -SET PROTOBUF_VERSION="3.4.0" :: Run cmake to create Visual Studio Project files. -%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -DPROTOBUF_VERSION=%PROTOBUF_VERSION% +%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% :: Run msbuild in the resulting VS project files to build a pip package. %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj -- GitLab From 3d5ffc90cd3c1bb750046b3b0bc3c85e4f317e33 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Tue, 30 Jan 2018 23:16:38 +0800 Subject: [PATCH 0033/1078] Update run_build.bat --- tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat index df70cae55d..5a362de399 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat @@ -35,10 +35,9 @@ IF DEFINED DISABLE_FORCEINLINE (ECHO DISABLE_FORCEINLINE is set to %DISABLE_FORC SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" -SET PROTOBUF_VERSION="3.4.0" :: Run cmake to create Visual Studio Project files. -%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -DPROTOBUF_VERSION=%PROTOBUF_VERSION% +%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% :: Run msbuild in the resulting VS project files to build a pip package. %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj -- GitLab From d6b17ce09bddfec1670935bffb06eecddb7c3ba4 Mon Sep 17 00:00:00 2001 From: JackyKo Date: Tue, 30 Jan 2018 23:25:36 +0800 Subject: [PATCH 0034/1078] README ipdate --- tensorflow/contrib/cmake/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index da72ea320f..ac7b7bcb04 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -39,7 +39,7 @@ bindings. * [Go](https://golang.org/) (optional, for full grpc build) -* [NASM](http://www.nasm.us/)/[yasm](http://yasm.tortall.net/) (optional, for full grpc build) +* [NASM](http://www.nasm.us/)/[YASM](http://yasm.tortall.net/) (optional, for full grpc build) * Additional pre-requisites for Microsoft Windows: - Visual Studio 2015 (latest version of MSVC 2017 is not suppored by CUDA yet, try it on your own) @@ -117,9 +117,9 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. * CUDNN (GPU build) * NCCL (GPU build on Linux) * SWIG (python binding) - * Perl (GPU build on Windows, required by full function grpc, optional) - * Go (GPU build on Windows, required by full function grpc, optional) - * NASM (GPU build on Windows, required by full function grpc, optional) + * Perl (required by grpc for ssl support, optional) + * Go (required by grpc for ssl support, optional) + * NASM/YASM (required by grpc for ssl support, optional) 1. Start CMake GUI 2. Click on `Browse Source` and direct to the the folder `/tensorflow/contrib/cmake` 3. Click on `Browse Build` and spectify a location that you want tensorflow to be build @@ -131,7 +131,7 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. * `tensorflow_BUILD_PYTHON_BINDING` is default to be `on`. Set to `off` if you don't need python interaface. If SWIG is not in system path, you need set it manually. (optional) * `tensorflow_BUILD_SHARED_LIB` is default to be `off`. Set to `on` if you want the c++ interface. (optional) * `tensorflow_ENABLE_GPU` is default to be `off`. Set to `on` if you want GPU support. It will search CUDA and CUDNN dependecies if you have set them to system path, otherwise CMake would prompt error and request you to set it manually. (optional) - * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. This need to be `on` for a gpu build. Reminded that Perl, Go and NASM is required for this option if you want to build grpc with offical SSL support. + * `tensorflow_ENABLE_GRPC_SUPPORT` is default to be `on`. For Linux build, this option must always be `on`. This need to be `on` for a gpu build. Reminded that Perl, Go and NASM/YASM are required for this option if you want to build grpc with offical SSL support. * `tensorflow_ENABLE_POSITION_INDEPENDENT_CODE` should always be `on` * `tensorflow_ENABLE_SNAPPY_SUPPORT` should always be `on` * `tensorflow_OPTIMIZE_FOR_NATIVE_ARCH` should always be `on` @@ -152,7 +152,7 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. ***The wheel name varies depends on you config. Change to your own wheel filename.*** - Remind that some pip installation requires administrator right command prompt. + Reminded that some pip installation requires administrator right command prompt. ##### C++ -- GitLab From 6d84ada6eed5ad6e362ef4bcbf768ebf095753b2 Mon Sep 17 00:00:00 2001 From: JackyKo Date: Tue, 30 Jan 2018 23:26:45 +0800 Subject: [PATCH 0035/1078] README update --- tensorflow/contrib/cmake/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index ac7b7bcb04..c2faafa9f2 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -135,7 +135,7 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. * `tensorflow_ENABLE_POSITION_INDEPENDENT_CODE` should always be `on` * `tensorflow_ENABLE_SNAPPY_SUPPORT` should always be `on` * `tensorflow_OPTIMIZE_FOR_NATIVE_ARCH` should always be `on` - * `CMAKE_INSTALL_PREFIX` is the location where the final package will be installed. You may change it to you own preferred path (optional) + * `CMAKE_INSTALL_PREFIX` is the location where the final package will be installed. You may change it to your own preferred path (optional) 6. After changing the configuration in step 5, press `Configure` again 7. If not error is found, press `Generate` -- GitLab From 8b377c00cfb40f5c364b8c7f96262ae37e9bb19c Mon Sep 17 00:00:00 2001 From: JackyKo Date: Tue, 30 Jan 2018 23:36:28 +0800 Subject: [PATCH 0036/1078] typo fix --- tensorflow/contrib/cmake/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index c2faafa9f2..e935abb5b2 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -42,7 +42,7 @@ bindings. * [NASM](http://www.nasm.us/)/[YASM](http://yasm.tortall.net/) (optional, for full grpc build) * Additional pre-requisites for Microsoft Windows: - - Visual Studio 2015 (latest version of MSVC 2017 is not suppored by CUDA yet, try it on your own) + - Visual Studio 2015 (latest version of MSVC 2017 is not supported by CUDA yet, try it on your own risk) - Python 3.5 - NumPy 1.11.0 or later -- GitLab From a74e6b6b13e79937b546f8cc0a5d172d5c787f98 Mon Sep 17 00:00:00 2001 From: jackyko Date: Wed, 31 Jan 2018 14:39:49 +0800 Subject: [PATCH 0037/1078] remove perl, go, asm requirement --- tensorflow/contrib/cmake/external/grpc.cmake | 40 -------------------- 1 file changed, 40 deletions(-) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index cd866f74c9..6ac087892a 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -14,46 +14,6 @@ # ============================================================================== include (ExternalProject) -# # perl -# find_package(Perl REQUIRED) - -# # nasm -# if(NOT CMAKE_ASM_NASM_COMPILER) -# if (WIN32) -# find_program(CMAKE_ASM_NASM_COMPILER -# NAMES nasm -# PATHS "$ENV{ProgramFiles}/NASM" -# DOC "path to NASM compiler") -# else() -# find_program(CMAKE_ASM_NASM_COMPILER -# NAMES nasm -# PATHS "/usr/bin" -# DOC "path to NASM compiler") -# endif() -# endif() - -# if(NOT CMAKE_ASM_NASM_COMPILER) -# message(FATAL_ERROR "NASM not found!") -# endif() - -# # go -# if(NOT GO_EXECUTABLE) -# if (WIN32) -# find_program(GO_EXECUTABLE -# NAMES go -# PATHS "$ENV{ProgramFiles}/Go/bin" -# DOC "path to go compiler") -# else() -# find_program(GO_EXECUTABLE -# NAMES go -# PATHS "/usr/bin" "/usr/local/bin" "/usr/local/go" -# DOC "path to go compiler") -# endif() -# endif() -# if(NOT GO_EXECUTABLE) -# message(FATAL_ERROR "Go not found!") -# endif() - set(GRPC_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/include) set(GRPC_URL https://github.com/grpc/grpc.git) set(GRPC_BUILD ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc) -- GitLab From b83f1b17b72d0a2313b025e4c0e94561dd54d950 Mon Sep 17 00:00:00 2001 From: jackyko Date: Wed, 31 Jan 2018 14:57:41 +0800 Subject: [PATCH 0038/1078] nccl header update --- tensorflow/contrib/cmake/CMakeLists.txt | 5 +++++ tensorflow/contrib/nccl/kernels/nccl_manager.h | 5 +++++ tensorflow/contrib/nccl/kernels/nccl_ops.cc | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 3ec630b3d6..c03c53e305 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -63,6 +63,11 @@ if (NOT WIN32) # option's default value is OFF. Fill it with real default values set(tensorflow_CUDNN_INCLUDE /usr/include) endif (NOT tensorflow_CUDNN_INCLUDE) + option(tensorflow_NCCL_INCLUDE "nccl.h headher install path" /usr/include/) + if (NOT tensorflow_NCCL_INCLUDE) + # option's default value is OFF. Fill it with real default values + set(tensorflow_NCCL_INCLUDE /usr/include) + endif (NOT tensorflow_NCCL_INCLUDE) option(tensorflow_PATH_CUDNN_STATIC_LIB "Override PATH_STATIC_LIB for libcudnn_static.a" ${tensorflow_PATH_STATIC_LIB}) if (NOT tensorflow_PATH_CUDNN_STATIC_LIB) # option's default value is OFF. Fill it with real default values diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.h b/tensorflow/contrib/nccl/kernels/nccl_manager.h index bb219e0edc..7726f5813a 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager.h +++ b/tensorflow/contrib/nccl/kernels/nccl_manager.h @@ -20,7 +20,12 @@ limitations under the License. #include #include +#ifdef (PLATFORM_GOOGLE) #include "src/nccl.h" +#else +#include "nccl.h" +#endif + #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/platform/mutex.h" diff --git a/tensorflow/contrib/nccl/kernels/nccl_ops.cc b/tensorflow/contrib/nccl/kernels/nccl_ops.cc index 266d4f6f0d..7e59e70bbc 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_ops.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_ops.cc @@ -17,7 +17,11 @@ limitations under the License. #include +#ifdef (PLATFORM_GOOGLE) #include "src/nccl.h" +#else +#include "nccl.h" +#endif #include "tensorflow/contrib/nccl/kernels/nccl_manager.h" #include "tensorflow/core/framework/op_kernel.h" -- GitLab From ffa7c0a459153d9d7c8f1a390fb708dca8519ed4 Mon Sep 17 00:00:00 2001 From: jackyko Date: Wed, 31 Jan 2018 15:45:38 +0800 Subject: [PATCH 0039/1078] macro define fix --- tensorflow/contrib/nccl/kernels/nccl_manager.h | 2 +- tensorflow/contrib/nccl/kernels/nccl_ops.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.h b/tensorflow/contrib/nccl/kernels/nccl_manager.h index 7726f5813a..0fe0476a2d 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager.h +++ b/tensorflow/contrib/nccl/kernels/nccl_manager.h @@ -20,7 +20,7 @@ limitations under the License. #include #include -#ifdef (PLATFORM_GOOGLE) +#ifdef PLATFORM_GOOGLE #include "src/nccl.h" #else #include "nccl.h" diff --git a/tensorflow/contrib/nccl/kernels/nccl_ops.cc b/tensorflow/contrib/nccl/kernels/nccl_ops.cc index 7e59e70bbc..03d4dc50a4 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_ops.cc +++ b/tensorflow/contrib/nccl/kernels/nccl_ops.cc @@ -17,7 +17,7 @@ limitations under the License. #include -#ifdef (PLATFORM_GOOGLE) +#ifdef PLATFORM_GOOGLE #include "src/nccl.h" #else #include "nccl.h" -- GitLab From 9800b2e903d7ae486008509987b736e777cb1ead Mon Sep 17 00:00:00 2001 From: jackyko Date: Thu, 1 Feb 2018 16:25:16 +0800 Subject: [PATCH 0040/1078] fix for windows api --- tensorflow/core/platform/default/logging.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h index 40c260f236..d456f631f2 100644 --- a/tensorflow/core/platform/default/logging.h +++ b/tensorflow/core/platform/default/logging.h @@ -187,6 +187,11 @@ string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) { return comb.NewString(); } +#ifdef COMPILER_MSVC +#undef max; +#undef min; +#endif + // Helper functions for CHECK_OP macro. // The (int, int) specialization works around the issue that the compiler // will not instantiate the template version of the function on values of -- GitLab From 5cb510ebc01efc9c680df34fd6175ea7d5bd307b Mon Sep 17 00:00:00 2001 From: JackyKo Date: Sun, 4 Feb 2018 20:19:24 +0800 Subject: [PATCH 0041/1078] readme update --- tensorflow/contrib/cmake/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index e935abb5b2..eb915bd9cb 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -35,11 +35,11 @@ bindings. * [SWIG](http://www.swig.org/download.html) -* [Perl](https://www.perl.org/get.html) (optional, for full grpc build) +* [Perl](https://www.perl.org/get.html) (optional, for SSL support build) -* [Go](https://golang.org/) (optional, for full grpc build) +* [Go](https://golang.org/) (optional, for SSL support build) -* [NASM](http://www.nasm.us/)/[YASM](http://yasm.tortall.net/) (optional, for full grpc build) +* [NASM](http://www.nasm.us/)/[YASM](http://yasm.tortall.net/) (optional, for SSL support build) * Additional pre-requisites for Microsoft Windows: - Visual Studio 2015 (latest version of MSVC 2017 is not supported by CUDA yet, try it on your own risk) @@ -117,8 +117,8 @@ Install from CMake GUI would be a convenient way to generate C++ build projects. * CUDNN (GPU build) * NCCL (GPU build on Linux) * SWIG (python binding) - * Perl (required by grpc for ssl support, optional) - * Go (required by grpc for ssl support, optional) + * Perl (required if you need ssl support, optional) + * Go (required if you need ssl support, optional) * NASM/YASM (required by grpc for ssl support, optional) 1. Start CMake GUI 2. Click on `Browse Source` and direct to the the folder `/tensorflow/contrib/cmake` -- GitLab From 21a9efc4cddbce661073544db31a63639686310a Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 28 Nov 2017 05:28:49 -0800 Subject: [PATCH 0042/1078] Add complex dtypes support for `tf.squared_difference` This fix tries to address the issue raised in 14932 where complex dtypes are not supported for `tf.squared_difference`, which is different from the doc string in `math_ops.cc` (see `BINARY_FEWER`). This fix adds the complex64 and complex128 support in kernel, and adds additional test cases. This fix fixes 14932. Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_op_squared_difference.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/cwise_op_squared_difference.cc b/tensorflow/core/kernels/cwise_op_squared_difference.cc index 78fefc69c7..d0ff271df6 100644 --- a/tensorflow/core/kernels/cwise_op_squared_difference.cc +++ b/tensorflow/core/kernels/cwise_op_squared_difference.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(BinaryOp, CPU, "SquaredDifference", functor::squared_difference, - float, Eigen::half, double, int32, int64); +REGISTER7(BinaryOp, CPU, "SquaredDifference", functor::squared_difference, + float, Eigen::half, double, int32, int64, complex64, complex128); #if GOOGLE_CUDA REGISTER4(BinaryOp, GPU, "SquaredDifference", functor::squared_difference, float, Eigen::half, double, int64); -- GitLab From 4f5e66aca388ee13e925d173a82644eed9d5a760 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 28 Nov 2017 05:32:08 -0800 Subject: [PATCH 0043/1078] Add test cases for complex dtypes support with `tf.squared_difference` Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index d314124ccd..7078ac99c8 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -203,7 +203,9 @@ class SquaredDifferenceTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() def testSquaredDifference(self): - for dtype in [np.int32, np.float16]: + for dtype in [np.float16, np.float32, np.float64, + np.int32, np.int64, + np.complex64, np.complex128]: x = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype) y = np.array([-3, -2, -1], dtype=dtype) z = (x - y) * (x - y) -- GitLab From f7bb3741549e791f687fa8289fb281717eae7426 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 24 Feb 2018 19:05:52 +0000 Subject: [PATCH 0044/1078] Add additional test to cover squared difference for complex where imag parts are not 0. Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 7078ac99c8..3224e40db2 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -213,6 +213,16 @@ class SquaredDifferenceTest(test_util.TensorFlowTestCase): z_tf = self.evaluate(math_ops.squared_difference(x, y)) self.assertAllClose(z, z_tf) + @test_util.run_in_graph_and_eager_modes() + def testComplexSquaredDifference(self): + for dtype in [np.complex64, np.complex128]: + x = np.array([[1+3j, 2+2j, 3+1j], [4-1j, 5-2j, 6-3j]], dtype=dtype) + y = np.array([-3+1j, -2+2j, -1+3j], dtype=dtype) + z = (x - y) * (x - y) + with test_util.device(use_gpu=True): + z_tf = self.evaluate(math_ops.squared_difference(x, y)) + self.assertAllClose(z, z_tf) + @test_util.with_c_api class ApproximateEqualTest(test_util.TensorFlowTestCase): -- GitLab From ab5cdb187d96e3a865724c3d41671dd253288456 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 24 Feb 2018 21:22:26 +0000 Subject: [PATCH 0045/1078] Enable squared_difference complex on CPU only Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops_test.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 3224e40db2..533a00e737 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -204,8 +204,7 @@ class SquaredDifferenceTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() def testSquaredDifference(self): for dtype in [np.float16, np.float32, np.float64, - np.int32, np.int64, - np.complex64, np.complex128]: + np.int32, np.int64]: x = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype) y = np.array([-3, -2, -1], dtype=dtype) z = (x - y) * (x - y) @@ -219,7 +218,7 @@ class SquaredDifferenceTest(test_util.TensorFlowTestCase): x = np.array([[1+3j, 2+2j, 3+1j], [4-1j, 5-2j, 6-3j]], dtype=dtype) y = np.array([-3+1j, -2+2j, -1+3j], dtype=dtype) z = (x - y) * (x - y) - with test_util.device(use_gpu=True): + with test_util.device(use_gpu=False): z_tf = self.evaluate(math_ops.squared_difference(x, y)) self.assertAllClose(z, z_tf) -- GitLab From d7f53eee873cf675eccd7a3f3d5966b8fa398887 Mon Sep 17 00:00:00 2001 From: Ka Long Date: Fri, 13 Apr 2018 10:29:47 +0800 Subject: [PATCH 0046/1078] Update CMakeLists.txt --- tensorflow/contrib/cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 251f3d2e0c..75a7664f1d 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.5) # Project -project(tensorflow VERSION 1.5.0 LANGUAGES C CXX) +project(tensorflow VERSION 1.7.0 LANGUAGES C CXX) # Set C++14 as standard for the whole project set(CMAKE_CXX_STANDARD 14) -- GitLab From 346ec501531e6af0bad0da52d2db4a81733fac27 Mon Sep 17 00:00:00 2001 From: Jacky Date: Mon, 23 Apr 2018 15:02:21 +0800 Subject: [PATCH 0047/1078] turn off png_test --- tensorflow/contrib/cmake/external/png.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/cmake/external/png.cmake b/tensorflow/contrib/cmake/external/png.cmake index ad2af01bc0..d2aa99efdf 100644 --- a/tensorflow/contrib/cmake/external/png.cmake +++ b/tensorflow/contrib/cmake/external/png.cmake @@ -58,6 +58,7 @@ ExternalProject_Add(png -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_INSTALL_PREFIX:STRING=${png_INSTALL} -DZLIB_ROOT:STRING=${ZLIB_INSTALL} + -DPNG_TESTS:BOOL=OFF ) ## put png includes in the directory where they are expected -- GitLab From 522eebf29e2c7fdb46b9bb5af5900cc864b19fba Mon Sep 17 00:00:00 2001 From: Jacky Date: Tue, 24 Apr 2018 14:41:23 +0800 Subject: [PATCH 0048/1078] new gpu framework modules added --- tensorflow/contrib/cmake/tf_core_framework.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 8cb5bc4f1b..ece815d205 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -139,16 +139,19 @@ set(tf_proto_text_srcs "tensorflow/core/example/example.proto" "tensorflow/core/example/feature.proto" "tensorflow/core/framework/allocation_description.proto" + "tensorflow/core/framework/api_def.proto" "tensorflow/core/framework/attr_value.proto" "tensorflow/core/framework/cost_graph.proto" "tensorflow/core/framework/device_attributes.proto" "tensorflow/core/framework/function.proto" "tensorflow/core/framework/graph.proto" "tensorflow/core/framework/graph_transfer_info.proto" + "tensorflow/core/framework/iterator.proto" "tensorflow/core/framework/kernel_def.proto" "tensorflow/core/framework/log_memory.proto" "tensorflow/core/framework/node_def.proto" "tensorflow/core/framework/op_def.proto" + "tensorflow/core/framework/reader_base.proto" "tensorflow/core/framework/remote_fused_graph_execute_info.proto" "tensorflow/core/framework/resource_handle.proto" "tensorflow/core/framework/step_stats.proto" @@ -158,6 +161,7 @@ set(tf_proto_text_srcs "tensorflow/core/framework/tensor_shape.proto" "tensorflow/core/framework/tensor_slice.proto" "tensorflow/core/framework/types.proto" + "tensorflow/core/framework/variable.proto" "tensorflow/core/framework/versions.proto" "tensorflow/core/lib/core/error_codes.proto" "tensorflow/core/protobuf/cluster.proto" -- GitLab From 973b80039920d25d93dc38aff4ab45b98b6b4872 Mon Sep 17 00:00:00 2001 From: Jacky Date: Tue, 24 Apr 2018 15:47:26 +0800 Subject: [PATCH 0049/1078] core_ops updated --- tensorflow/contrib/cmake/tf_core_ops.cmake | 8 +++++--- tensorflow/contrib/cmake/tf_python.cmake | 21 ++++++++++++++------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/cmake/tf_core_ops.cmake b/tensorflow/contrib/cmake/tf_core_ops.cmake index c356fb1494..42c621700b 100644 --- a/tensorflow/contrib/cmake/tf_core_ops.cmake +++ b/tensorflow/contrib/cmake/tf_core_ops.cmake @@ -13,13 +13,14 @@ # limitations under the License. # ============================================================================== set(tf_op_lib_names - "audio_ops" "array_ops" + "audio_ops" "batch_ops" "bitwise_ops" "boosted_trees_ops" "candidate_sampling_ops" "checkpoint_ops" + "collective_ops" "control_flow_ops" "ctc_ops" "cudnn_rnn_ops" @@ -33,8 +34,8 @@ set(tf_op_lib_names "io_ops" "linalg_ops" "list_ops" - "lookup_ops" "logging_ops" + "lookup_ops" "manip_ops" "math_ops" "nn_ops" @@ -44,10 +45,11 @@ set(tf_op_lib_names "remote_fused_graph_ops" "resource_variable_ops" "rpc_ops" + "scoped_allocator_ops" "script_ops" "sdca_ops" - "set_ops" "sendrecv_ops" + "set_ops" "sparse_ops" "spectral_ops" "state_ops" diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 46d36d227f..1a630a2bd4 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -315,15 +315,14 @@ function(GENERATE_PYTHON_OP_LIB tf_python_op_lib_name) ${GENERATE_PYTHON_OP_LIB_DESTINATION} PARENT_SCOPE) endfunction() -GENERATE_PYTHON_OP_LIB("audio_ops") GENERATE_PYTHON_OP_LIB("array_ops") +GENERATE_PYTHON_OP_LIB("audio_ops") GENERATE_PYTHON_OP_LIB("batch_ops") GENERATE_PYTHON_OP_LIB("bitwise_ops") GENERATE_PYTHON_OP_LIB("boosted_trees_ops") -GENERATE_PYTHON_OP_LIB("math_ops") -GENERATE_PYTHON_OP_LIB("functional_ops") GENERATE_PYTHON_OP_LIB("candidate_sampling_ops") GENERATE_PYTHON_OP_LIB("checkpoint_ops") +GENERATE_PYTHON_OP_LIB("collective_ops") GENERATE_PYTHON_OP_LIB("control_flow_ops" ADDITIONAL_LIBRARIES $) GENERATE_PYTHON_OP_LIB("ctc_ops") @@ -334,14 +333,18 @@ GENERATE_PYTHON_OP_LIB("decode_proto_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/proto/python/ops/gen_decode_proto_op.py) GENERATE_PYTHON_OP_LIB("encode_proto_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/proto/python/ops/gen_encode_proto_op.py) +GENERATE_PYTHON_OP_LIB("function_ops") +GENERATE_PYTHON_OP_LIB("functional_ops") GENERATE_PYTHON_OP_LIB("image_ops") GENERATE_PYTHON_OP_LIB("io_ops") GENERATE_PYTHON_OP_LIB("linalg_ops") GENERATE_PYTHON_OP_LIB("list_ops") GENERATE_PYTHON_OP_LIB("logging_ops") GENERATE_PYTHON_OP_LIB("lookup_ops") -GENERATE_PYTHON_OP_LIB("nn_ops") GENERATE_PYTHON_OP_LIB("manip_ops") +GENERATE_PYTHON_OP_LIB("math_ops") +GENERATE_PYTHON_OP_LIB("nn_ops") +GENERATE_PYTHON_OP_LIB("no_op") GENERATE_PYTHON_OP_LIB("parsing_ops") GENERATE_PYTHON_OP_LIB("random_ops") GENERATE_PYTHON_OP_LIB("remote_fused_graph_ops" @@ -349,17 +352,21 @@ GENERATE_PYTHON_OP_LIB("remote_fused_graph_ops" GENERATE_PYTHON_OP_LIB("resource_variable_ops") GENERATE_PYTHON_OP_LIB("rpc_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/rpc/python/ops/gen_rpc_op.py) +GENERATE_PYTHON_OP_LIB("scoped_allocator_ops") GENERATE_PYTHON_OP_LIB("script_ops") GENERATE_PYTHON_OP_LIB("sdca_ops") +GENERATE_PYTHON_OP_LIB("sendrecv_ops") GENERATE_PYTHON_OP_LIB("set_ops") -GENERATE_PYTHON_OP_LIB("state_ops") GENERATE_PYTHON_OP_LIB("sparse_ops") GENERATE_PYTHON_OP_LIB("spectral_ops") +GENERATE_PYTHON_OP_LIB("state_ops") +GENERATE_PYTHON_OP_LIB("stateless_random_ops") GENERATE_PYTHON_OP_LIB("string_ops") GENERATE_PYTHON_OP_LIB("summary_ops") GENERATE_PYTHON_OP_LIB("user_ops") GENERATE_PYTHON_OP_LIB("training_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/training/gen_training_ops.py) +GENERATE_PYTHON_OP_LIB("word2vec_ops") GENERATE_PYTHON_OP_LIB("contrib_boosted_trees_model_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/boosted_trees/python/ops/gen_model_ops.py) @@ -422,8 +429,8 @@ GENERATE_PYTHON_OP_LIB("contrib_text_skip_gram_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/text/python/ops/gen_skip_gram_ops.py) GENERATE_PYTHON_OP_LIB("contrib_bigquery_reader_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cloud/python/ops/gen_bigquery_reader_ops.py) -GENERATE_PYTHON_OP_LIB("stateless_random_ops" - DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/stateless/gen_stateless_random_ops.py) +# GENERATE_PYTHON_OP_LIB("stateless_random_ops" +# DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/stateless/gen_stateless_random_ops.py) GENERATE_PYTHON_OP_LIB("debug_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/debug/ops/gen_debug_ops.py) -- GitLab From e28cbfe68e11f97c89e0472b7fecd187485d0916 Mon Sep 17 00:00:00 2001 From: Jacky Date: Tue, 24 Apr 2018 19:23:20 +0800 Subject: [PATCH 0050/1078] version number update --- tensorflow/contrib/cmake/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 8c6d8f2e72..5396e434b1 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.5) # Project -project(tensorflow VERSION 1.7.0 LANGUAGES C CXX) +project(tensorflow VERSION 1.8.0 LANGUAGES C CXX) # Set C++14 as standard for the whole project set(CMAKE_CXX_STANDARD 14) @@ -69,7 +69,7 @@ if (NOT WIN32) # option's default value is OFF. Fill it with real default values set(tensorflow_CUDNN_INCLUDE /usr/include) endif (NOT tensorflow_CUDNN_INCLUDE) - option(tensorflow_NCCL_INCLUDE "nccl.h headher install path" /usr/include/) + option(tensorflow_NCCL_INCLUDE "nccl.h header install path" /usr/include/) if (NOT tensorflow_NCCL_INCLUDE) # option's default value is OFF. Fill it with real default values set(tensorflow_NCCL_INCLUDE /usr/include) -- GitLab From 664f3414aee08d0ec71e13c7329a6712c15de2da Mon Sep 17 00:00:00 2001 From: Jacky Date: Tue, 24 Apr 2018 19:24:18 +0800 Subject: [PATCH 0051/1078] typo correct --- tensorflow/contrib/cmake/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index be3dfdf043..f5d17dbafc 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -4,7 +4,7 @@ TensorFlow CMake build This directory contains CMake files for building TensorFlow on Microsoft Windows and Linux. [CMake](https://cmake.org) is a cross-platform tool that can generate build scripts for multiple build systems, including Microsoft -Visual Studio and GCC. The method suppose could work on MacOS, but not tested yet. +Visual Studio and GCC. "The method has not been tested on Mac OS X. **N.B.** We provide Linux build instructions primarily for the purpose of testing the build. We recommend using the standard Bazel-based build on @@ -14,12 +14,12 @@ Current Status -------------- CMake can be used to build TensorFlow on all platforms. See the [getting started documentation](https://www.tensorflow.org/install/install_windows) -for instructions on how to install a pre-built TensorFlow package on Windows and Linux. Procedure in MacOS is similar to the Linux build. +for instructions on how to install a pre-built TensorFlow package on Windows and Linux. The procedure in MacOS is similar to the Linux build. ### Current known limitations * It is not possible to load a custom Op library. * GCS file system is not supported. -* Debug build is not available since python no longer release debug library. +* Debug build is not available since Python for Windows is no longer distributed with a debug library. ## Building with CMake -- GitLab From 149714f6845692f588329c54d20f93f10b1641c6 Mon Sep 17 00:00:00 2001 From: Jacky Date: Wed, 25 Apr 2018 17:34:33 +0800 Subject: [PATCH 0052/1078] refine under code review --- tensorflow/contrib/cmake/external/grpc.cmake | 3 +-- tensorflow/contrib/cmake/tf_python.cmake | 2 -- tensorflow/core/util/cuda_device_functions.h | 16 +++++++++------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 56774bf8aa..2258a81170 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -28,6 +28,7 @@ else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libaddress_sorting.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/zlib/libz.a) @@ -53,8 +54,6 @@ ExternalProject_Add(grpc -DPROTOBUF_LIBRARIES:STRING=${protobuf_STATIC_LIBRARIES} -DZLIB_ROOT:STRING=${ZLIB_INSTALL} -DgRPC_SSL_PROVIDER:STRING=NONE - # -DCMAKE_ASM_NASM_COMPILER:STRING=${CMAKE_ASM_NASM_COMPILER} - # -DGO_EXECUTABLE:STRING=${GO_EXECUTABLE} ) # grpc/src/core/ext/census/tracing.c depends on the existence of openssl/rand.h. diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 1a630a2bd4..bf5c54d087 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -429,8 +429,6 @@ GENERATE_PYTHON_OP_LIB("contrib_text_skip_gram_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/text/python/ops/gen_skip_gram_ops.py) GENERATE_PYTHON_OP_LIB("contrib_bigquery_reader_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/cloud/python/ops/gen_bigquery_reader_ops.py) -# GENERATE_PYTHON_OP_LIB("stateless_random_ops" -# DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/stateless/gen_stateless_random_ops.py) GENERATE_PYTHON_OP_LIB("debug_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/python/debug/ops/gen_debug_ops.py) diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h index a8d67aa501..846a323a6d 100644 --- a/tensorflow/core/util/cuda_device_functions.h +++ b/tensorflow/core/util/cuda_device_functions.h @@ -28,20 +28,22 @@ limitations under the License. #include #include -#ifdef PLATFORM_GOOGLE -#include "cuda/include/cuda.h" -#include "cuda/include/device_functions.h" -#else +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#ifdef _WIN32 #include "cuda.h" #include "device_functions.h" +#else +#include "cuda/include/cuda.h" +#include "cuda/include/device_functions.h" #endif #include "tensorflow/core/platform/types.h" #if CUDA_VERSION >= 7050 -#ifdef PLATFORM_GOOGLE -#include "cuda/include/cuda_fp16.h" -#else +#ifdef _WIN32 #include "cuda_fp16.h" +#else +#include "cuda/include/cuda_fp16.h" #endif #endif // CUDA_VERSION >= 7050 -- GitLab From 7f21edc8239c1f90ce438e1a14dc5560e01d4f65 Mon Sep 17 00:00:00 2001 From: Jacky Date: Wed, 25 Apr 2018 17:38:49 +0800 Subject: [PATCH 0053/1078] add suppress regeneration back --- tensorflow/contrib/cmake/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 5396e434b1..d7aab58b65 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -166,8 +166,13 @@ if(WIN32) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /D_ITERATOR_DEBUG_LEVEL=0") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /D_ITERATOR_DEBUG_LEVEL=0") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /D_ITERATOR_DEBUG_LEVEL=0") + + # Try to avoid flaky failures due to failed generation of generate.stamp files. + set(CMAKE_SUPPRESS_REGENERATION ON) endif() + + if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions -std=c++11") endif() -- GitLab From a210139765d916baa516dbaf571a435d588cb2e4 Mon Sep 17 00:00:00 2001 From: Jacky Date: Wed, 25 Apr 2018 18:01:47 +0800 Subject: [PATCH 0054/1078] update accroding to review --- tensorflow/contrib/cmake/CMakeLists.txt | 1 - tensorflow/core/platform/default/logging.h | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index d7aab58b65..c9e2002977 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -172,7 +172,6 @@ if(WIN32) endif() - if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions -std=c++11") endif() diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h index 916d3caa7e..6305cb2823 100644 --- a/tensorflow/core/platform/default/logging.h +++ b/tensorflow/core/platform/default/logging.h @@ -187,9 +187,10 @@ string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) { return comb.NewString(); } -#ifdef COMPILER_MSVC -#undef max; -#undef min; +// for MSVC build, the max and min function maybe defined in other macros +#ifdef _WIN32 +#undef max +#undef min #endif // Helper functions for CHECK_OP macro. -- GitLab From 446707346fbbee5cf228aab0c25a2cf08de9e7dc Mon Sep 17 00:00:00 2001 From: Jacky Date: Thu, 26 Apr 2018 16:43:21 +0800 Subject: [PATCH 0055/1078] header fix --- tensorflow/contrib/nccl/kernels/nccl_manager.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/nccl/kernels/nccl_manager.h b/tensorflow/contrib/nccl/kernels/nccl_manager.h index 90cdea47c2..e81a529227 100644 --- a/tensorflow/contrib/nccl/kernels/nccl_manager.h +++ b/tensorflow/contrib/nccl/kernels/nccl_manager.h @@ -23,8 +23,8 @@ limitations under the License. #ifdef WIN32 #include "nccl.h" #else -"third_party/nccl/nccl.h" -#else +#include "third_party/nccl/nccl.h" +#endif #include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h" #include "tensorflow/core/framework/tensor.h" -- GitLab From 961b9086d6c435d078913fbc7293a91ee819bd38 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Sat, 28 Apr 2018 11:07:45 +0800 Subject: [PATCH 0056/1078] modified for latest eager runtime --- tensorflow/c/python_api.cc | 2 +- tensorflow/contrib/cmake/CMakeLists.txt | 3 +- tensorflow/contrib/cmake/tf_c.cmake | 27 ++++----- tensorflow/contrib/cmake/tf_core_cpu.cmake | 4 +- .../contrib/cmake/tf_core_eager_runtime.cmake | 57 +++++++++++++++++++ tensorflow/contrib/cmake/tf_python.cmake | 2 + tensorflow/contrib/cmake/tf_shared_lib.cmake | 4 ++ 7 files changed, 80 insertions(+), 19 deletions(-) create mode 100644 tensorflow/contrib/cmake/tf_core_eager_runtime.cmake diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index e18fdf6c57..cb9038978d 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/c/python_api.h" #include "tensorflow/c/c_api_internal.h" -#include "tensorflow/python/framework/cpp_shape_inference.pb.h" +#include "tensorflow/python/framework/cpp_shape_inference.h" namespace tensorflow { diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 981f50bc95..3016a7e2bd 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -539,6 +539,7 @@ include(tf_cc_ops.cmake) include(tf_c.cmake) include(tf_grappler.cmake) include(tf_core_profiler.cmake) +include(tf_core_eager_runtime.cmake) if(tensorflow_BUILD_CC_EXAMPLE) include(tf_tutorials.cmake) include(tf_label_image_example.cmake) @@ -552,4 +553,4 @@ if(tensorflow_BUILD_SHARED_LIB) endif() if(tensorflow_BUILD_CC_TESTS OR tensorflow_BUILD_PYTHON_TESTS) include(tf_tests.cmake) -endif() +endif() \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index c6a15f2ca0..0479a90713 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -19,11 +19,6 @@ set(tf_c_srcs "${tensorflow_source_dir}/tensorflow/c/c_api.cc" "${tensorflow_source_dir}/tensorflow/c/c_api.h" "${tensorflow_source_dir}/tensorflow/c/c_api_function.cc" - "${tensorflow_source_dir}/tensorflow/c/eager/c_api.cc" - "${tensorflow_source_dir}/tensorflow/c/eager/c_api.h" - "${tensorflow_source_dir}/tensorflow/c/eager/tape.h" - "${tensorflow_source_dir}/tensorflow/c/eager/runtime.cc" - "${tensorflow_source_dir}/tensorflow/c/eager/runtime.h" "${tensorflow_source_dir}/tensorflow/c/checkpoint_reader.cc" "${tensorflow_source_dir}/tensorflow/c/checkpoint_reader.h" "${tensorflow_source_dir}/tensorflow/c/tf_status_helper.cc" @@ -38,13 +33,15 @@ add_dependencies( tf_core_lib tf_protos_cc) -add_library(tf_c_python_api OBJECT - "${tensorflow_source_dir}/tensorflow/c/python_api.cc" - "${tensorflow_source_dir}/tensorflow/c/python_api.h" -) -add_dependencies( - tf_c_python_api - tf_c - tf_core_lib - tf_core_framework - tf_protos_cc) +if(tensorflow_BUILD_PYTHON_BINDINGS) + add_library(tf_c_python_api OBJECT + "${tensorflow_source_dir}/tensorflow/c/python_api.cc" + "${tensorflow_source_dir}/tensorflow/c/python_api.h" + ) + add_dependencies( + tf_c_python_api + tf_c + tf_core_lib + tf_core_framework + tf_protos_cc) +endif() diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake index 1562b6e0a3..d8884d464f 100644 --- a/tensorflow/contrib/cmake/tf_core_cpu.cmake +++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake @@ -20,8 +20,6 @@ file(GLOB_RECURSE tf_core_cpu_srcs "${tensorflow_source_dir}/tensorflow/cc/saved_model/*.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/*.h" "${tensorflow_source_dir}/tensorflow/core/common_runtime/*.cc" - "${tensorflow_source_dir}/tensorflow/core/common_runtime/eager/*.h" - "${tensorflow_source_dir}/tensorflow/core/common_runtime/eager/*.cc" "${tensorflow_source_dir}/tensorflow/core/debug/*.h" "${tensorflow_source_dir}/tensorflow/core/debug/*.cc" "${tensorflow_source_dir}/tensorflow/core/distributed_runtime/server_lib.h" @@ -41,6 +39,8 @@ file(GLOB_RECURSE tf_core_cpu_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/*test*.h" "${tensorflow_source_dir}/tensorflow/core/*test*.cc" "${tensorflow_source_dir}/tensorflow/core/*main.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/eager/*.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/eager/*.h" "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/*.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu_device_factory.cc" "${tensorflow_source_dir}/tensorflow/core/common_runtime/direct_session.cc" diff --git a/tensorflow/contrib/cmake/tf_core_eager_runtime.cmake b/tensorflow/contrib/cmake/tf_core_eager_runtime.cmake new file mode 100644 index 0000000000..78e4c0d303 --- /dev/null +++ b/tensorflow/contrib/cmake/tf_core_eager_runtime.cmake @@ -0,0 +1,57 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +######################################################## +# tf_core_eager_runtime library +######################################################## +file(GLOB_RECURSE tf_core_eager_runtime_srcs + "${tensorflow_source_dir}/tensorflow/core/common_runtime/eager/*.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/eager/*.h" +) + +file(GLOB_RECURSE tf_core_eager_runtime_exclude_srcs + "${tensorflow_source_dir}/tensorflow/core/common_runtime/eager/*test*.h" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/eager/*test*.cc" +) + +list(REMOVE_ITEM tf_core_eager_runtime_srcs ${tf_core_eager_runtime_exclude_srcs}) + +add_library(tf_core_eager_runtime OBJECT ${tf_core_eager_runtime_srcs}) +add_dependencies( + tf_core_eager_runtime + tf_c + tf_core_lib) + + +file(GLOB_RECURSE tf_c_eager_srcs + "${tensorflow_source_dir}/tensorflow/c/eager/*.cc" + "${tensorflow_source_dir}/tensorflow/c/eager/*.h" +) + +file(GLOB_RECURSE tf_c_eager_exlclude_srcs + "${tensorflow_source_dir}/tensorflow/c/eager/*test*.h" + "${tensorflow_source_dir}/tensorflow/c/eager/*test*.cc" +) + +list(REMOVE_ITEM tf_c_eager_srcs ${tf_c_eager_exlclude_srcs}) + +add_library(tf_c_eager OBJECT ${tf_c_eager_srcs}) +add_dependencies( + tf_c_eager + tf_core_eager_runtime + tf_c + tf_cc_framework + tf_cc_while_loop + tf_core_lib + tf_protos_cc) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index bf5c54d087..fd0b3bd38c 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -524,6 +524,7 @@ if(WIN32) $ $ $ + $ $ $ $ @@ -581,6 +582,7 @@ add_library(pywrap_tensorflow_internal SHARED $ $ $ + $ $ $ $ diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index 41f8391b6f..a6f4ac2f18 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -23,6 +23,8 @@ if(WIN32) # we need. # add_library(tensorflow_static STATIC + $ + $ $ $ $ @@ -65,6 +67,8 @@ endif(WIN32) # tensorflow is a shared library containing all of the # TensorFlow runtime and the standard ops and kernels. add_library(tensorflow SHARED + $ + $ $ $ $ -- GitLab From a1a036742ef614e28064b0839c8b17d1cb9025a0 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Sat, 28 Apr 2018 11:15:35 +0800 Subject: [PATCH 0057/1078] update pywrap dependecy with eager --- tensorflow/contrib/cmake/tf_python.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index fd0b3bd38c..02f56e9bad 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -519,6 +519,7 @@ if(WIN32) add_library(pywrap_tensorflow_internal_static STATIC ${pywrap_tensorflow_internal_src} $ + $ $ $ $ @@ -577,6 +578,7 @@ endif(WIN32) add_library(pywrap_tensorflow_internal SHARED ${pywrap_tensorflow_internal_src} $ + $ $ $ $ -- GitLab From 3165f6efe06d45caadbfb568ec6c0e4a6e9b789b Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Sat, 28 Apr 2018 12:14:37 +0800 Subject: [PATCH 0058/1078] add numpy dependency --- tensorflow/c/python_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index cb9038978d..e18fdf6c57 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -16,7 +16,7 @@ limitations under the License. #include "tensorflow/c/python_api.h" #include "tensorflow/c/c_api_internal.h" -#include "tensorflow/python/framework/cpp_shape_inference.h" +#include "tensorflow/python/framework/cpp_shape_inference.pb.h" namespace tensorflow { -- GitLab From 7dc284ec17ba76258e14ac4e7f322863aaaa6546 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Sat, 28 Apr 2018 12:16:08 +0800 Subject: [PATCH 0059/1078] add numpy dependency --- tensorflow/contrib/cmake/tf_c.cmake | 34 +++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index 0479a90713..9415a6ceb2 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -12,6 +12,34 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== + +# 1. Resolve the installed version of Python (for Python.h and python). +# TODO(mrry): Parameterize the build script to enable Python 3 building. +if(NOT PYTHON_INCLUDE_DIR) + set(PYTHON_NOT_FOUND false) + exec_program("${PYTHON_EXECUTABLE}" + ARGS "-c \"import distutils.sysconfig; print(distutils.sysconfig.get_python_inc())\"" + OUTPUT_VARIABLE PYTHON_INCLUDE_DIR + RETURN_VALUE PYTHON_NOT_FOUND) + if(${PYTHON_NOT_FOUND}) + message(FATAL_ERROR + "Cannot get Python include directory. Is distutils installed?") + endif(${PYTHON_NOT_FOUND}) +endif(NOT PYTHON_INCLUDE_DIR) + +# 2. Resolve the installed version of NumPy (for numpy/arrayobject.h). +if(NOT NUMPY_INCLUDE_DIR) + set(NUMPY_NOT_FOUND false) + exec_program("${PYTHON_EXECUTABLE}" + ARGS "-c \"import numpy; print(numpy.get_include())\"" + OUTPUT_VARIABLE NUMPY_INCLUDE_DIR + RETURN_VALUE NUMPY_NOT_FOUND) + if(${NUMPY_NOT_FOUND}) + message(FATAL_ERROR + "Cannot get NumPy include directory: Is NumPy installed?") + endif(${NUMPY_NOT_FOUND}) +endif(NOT NUMPY_INCLUDE_DIR) + ######################################################## # tf_c_framework library ######################################################## @@ -38,6 +66,12 @@ if(tensorflow_BUILD_PYTHON_BINDINGS) "${tensorflow_source_dir}/tensorflow/c/python_api.cc" "${tensorflow_source_dir}/tensorflow/c/python_api.h" ) + + target_include_directories(tf_c_python_api PUBLIC + ${PYTHON_INCLUDE_DIR} + ${NUMPY_INCLUDE_DIR} + ) + add_dependencies( tf_c_python_api tf_c -- GitLab From 7950d197767ef24a1525b809a310b82020f665ba Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Wed, 1 Aug 2018 13:43:35 -0700 Subject: [PATCH 0060/1078] MKL DNN: Adding support of fusing Pad and Conv2D in MKL DNN optimized code --- tensorflow/core/graph/mkl_layout_pass.cc | 298 +++++++++++++++++- tensorflow/core/graph/mkl_layout_pass_test.cc | 70 ++++ tensorflow/core/kernels/BUILD | 28 ++ tensorflow/core/kernels/mkl_conv_ops.cc | 96 +++++- tensorflow/core/kernels/mkl_conv_ops.h | 28 +- tensorflow/core/ops/nn_ops.cc | 49 +++ 6 files changed, 554 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index c22e0a3872..d0abe5da35 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2432,6 +2432,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.mkl_conv2d_with_bias = "_MklConv2DWithBias"; csinfo_.mkl_conv2d_grad_filter_with_bias = "_MklConv2DBackpropFilterWithBias"; + csinfo_.mkl_pad_with_conv2d = "_MklPadWithConv2D"; + csinfo_.pad = "Pad"; + csinfo_.pad_with_conv2d = "__MklDummyPadWithConv2D"; csinfo_.relu = "Relu"; csinfo_.relu_grad = "ReluGrad"; csinfo_.tanh = "Tanh"; @@ -2508,6 +2511,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.mul, mkl_op_registry::GetMklOpName(csinfo_.mul), CopyAttrsDataType, AlwaysRewrite}); + rinfo_.push_back({csinfo_.pad_with_conv2d, csinfo_.mkl_pad_with_conv2d, + CopyAttrsPadWithConv2D, AlwaysRewrite}); rinfo_.push_back({csinfo_.relu, mkl_op_registry::GetMklOpName(csinfo_.relu), CopyAttrsDataType, AlwaysRewrite}); rinfo_.push_back({csinfo_.relu_grad, @@ -2546,6 +2551,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad, csinfo_.conv2d_grad_filter_with_bias, GetConv2DBackpropFilterOrBiasAddGrad}); + minfo_.push_back({csinfo_.pad, csinfo_.conv2d, + csinfo_.pad_with_conv2d, GetPadOrConv2D}); + //TODO : Need to check if pad is with zero or not + // if is zero then replace, if not then do not replace } // Standard interface to run pass @@ -2628,7 +2637,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string mkl_conv2d_grad_filter; string mkl_conv2d_grad_filter_with_bias; string mkl_conv2d_with_bias; + string mkl_pad_with_conv2d; string mul; + string pad; + string pad_with_conv2d; string relu; string relu_grad; string tanh; @@ -2734,6 +2746,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // Helper function to merge different nodes Status MergeConv2DWithBiasAdd(std::unique_ptr* g, Node* m, Node* n); + Status MergePadWithConv2D(std::unique_ptr* g, Node* m, Node* n); Status MergeConv2DBackpropFilterWithBiasAddGrad(std::unique_ptr* g, Node* m, Node* n); @@ -2771,6 +2784,59 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return n; } + // Find Pad or Conv2D node that can be merged with input node 'm'. + // If input 'm' is Pad, then check if there exists Conv2D node that can be + // merged with 'm'. If input 'm' is Conv2D, then check if there exists BiasAdd + // node that can be merged with 'm'. + static Node* GetPadOrConv2D(const Node* m) { + CHECK_NOTNULL(m); + Node* n = nullptr; + + if (m->type_string() == csinfo_.pad) { + // If m is Pad, then Conv2D is the output of Pad. + for (const Edge* e : m->out_edges()) { + if (!e->IsControlEdge() && + e->dst()->type_string() == csinfo_.conv2d) { + n = e->dst(); + break; + } + } + } else { + CHECK_EQ(m->type_string(), csinfo_.conv2d); + // If m is conv2D, Go over all input edges + // and search for Pad Node. + for (const Edge* e : m->in_edges()) { + if (!e->IsControlEdge() && + e->src()->type_string() == csinfo_.pad) { + n = e->src(); + break; + } + } + } + // Check if only VALID type of padding is used + // or not. + if (n != nullptr) { + const Node* conv_node; + if (m->type_string() == csinfo_.conv2d) + conv_node = m; + else + conv_node = n; + string padding; + TF_CHECK_OK(GetNodeAttr(conv_node->def(), "padding", &padding)); + if (padding != "VALID") + // Then do not merge. + // Only VALID type of padding in conv op can be + // merged with Pad op. + n = nullptr; + } + if (n == nullptr) { + VLOG(1) << "MklLayoutRewritePass: Could not find matching " + << "Pad and Conv2D node for merging. Input node: " + << m->DebugString(); + } + + return n; + } // Find Conv2DBackpropFilter or BiasAddGrad node that can be merged with input // node 'm'. If input 'm' is Conv2DBackpropFilter, then check if there exists // BiasAddGrad node that can be merged with 'm'. If input 'm' is BiasAddGrad, @@ -3090,6 +3156,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb); + static void CopyAttrsPadWithConv2D(const Node* orig_node, NodeBuilder* nb); + static void CopyAttrsFromPadAndConv2D(const Node* orig_node1, const Node* orig_node2, + NodeBuilder* nb); static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb); @@ -3289,6 +3358,8 @@ int MklLayoutRewritePass::SetUpContiguousInputs( // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias. for (const Edge* e : filter_node->out_edges()) { if ((e->dst()->type_string() == csinfo_.mkl_conv2d || + // add check for mkl_pad_with_conv2d + e->dst()->type_string() == csinfo_.mkl_pad_with_conv2d || e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) && e->dst_input() == kConv2DFilterInputSlotIdx /* filter is 2nd input of Conv2D and _MklConv2D. */) { @@ -3598,6 +3669,65 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); } +//used in rinfo when replacing __MklDummyPadWithConv2D by _MklPadWithConv2D +void MklLayoutRewritePass::CopyAttrsPadWithConv2D(const Node* orig_node, + NodeBuilder* nb) { + DataType Tpaddings; + DataType T; + string data_format; + string padding; + std::vector strides; + bool use_cudnn_on_gpu; + + // Get all attributes from old node 1. + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); + TF_CHECK_OK( + GetNodeAttr(orig_node->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "Tpaddings", &Tpaddings)); + + // Add attributes to new node. + nb->Attr("T", T); + nb->Attr("strides", strides); + nb->Attr("padding", padding); + nb->Attr("data_format", data_format); + nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); + nb->Attr("Tpaddings", Tpaddings); +} + +//used with MergePadWithConv2D +void MklLayoutRewritePass::CopyAttrsFromPadAndConv2D(const Node* orig_node1, + const Node* orig_node2, NodeBuilder* nb) { + DataType Tpaddings; + DataType T; + string data_format; + string padding; + std::vector strides; + bool use_cudnn_on_gpu; + + // Get all attributes from old node 1. + TF_CHECK_OK(GetNodeAttr(orig_node1->def(), "T", &T)); + TF_CHECK_OK(GetNodeAttr(orig_node1->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(orig_node1->def(), "padding", &padding)); + TF_CHECK_OK(GetNodeAttr(orig_node1->def(), "data_format", &data_format)); + TF_CHECK_OK( + GetNodeAttr(orig_node1->def(), "use_cudnn_on_gpu", &use_cudnn_on_gpu)); + // Get all attributes from old node 2. + TF_CHECK_OK(GetNodeAttr(orig_node2->def(), "Tpaddings", &Tpaddings)); + + // Add attributes to new node. + nb->Attr("T", T); + nb->Attr("strides", strides); + nb->Attr("padding", padding); + nb->Attr("data_format", data_format); + nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); + + + // Add attributes to new node. + nb->Attr("Tpaddings", Tpaddings); +} void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb) { DataType T; @@ -3824,7 +3954,7 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, // If 'm' is BiasAdd, then 'n' is Conv2D. Since Conv2D feeds BiasAdd, // BiasAdd is successor node, and Conv2D predecessor node. Node* pred = m->type_string() == csinfo_.bias_add ? n : m; - Node* succ = m->type_string() == csinfo_.bias_add ? m : n; + Node* succ = m->type_string() == csinfo_.bias_add ? m : n; // 1. Get all attributes from input nodes. DataType T_pred, T_succ; @@ -3963,6 +4093,161 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, return Status::OK(); } +Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, + Node* m, Node* n) { + CHECK_EQ(((m->type_string() == csinfo_.pad && + n->type_string() == csinfo_.conv2d)) || + ((n->type_string() == csinfo_.pad && + m->type_string() == csinfo_.conv2d)), + true); + + // Conv2D is successor node, and Pad predecessor node. + Node* pred = m->type_string() == csinfo_.pad ? m : n; + Node* succ = m->type_string() == csinfo_.pad ? n : m; + + // 1. Get all attributes from input nodes. + DataType T_pred, T_succ; + string padding; + std::vector strides; + std::vector dilations; + string data_format_pred, data_format_succ; + bool use_cudnn_on_gnu; + TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred)); + TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ)); + TF_CHECK_OK(GetNodeAttr(succ->def(), "padding", &padding)); + TF_CHECK_OK(GetNodeAttr(succ->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(succ->def(), "dilations", &dilations)); + // data format for pad is not available and not necessary, thus + // we dont need to match data format + // TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred)); + TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ)); + TF_CHECK_OK(GetNodeAttr(succ->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu)); + // We check to ensure that data formats of both succ and pred are same. + // We expect them to be same, so we can enforce this as assert. + // But assert can be too strict, so we enforce this as a check. + // If the check fails, then we do not merge two nodes. + // We also do same check for devices. + // if (data_format_pred != data_format_succ || T_pred != T_succ || + if (T_pred != T_succ || + pred->assigned_device_name() != succ->assigned_device_name() || + pred->def().device() != succ->def().device()) { + return Status(error::Code::INVALID_ARGUMENT, + "data_format or T attribute or devices of Conv2D and " + "Pad do not match. Will skip node merge optimization"); + } + + const int succ_num = succ->num_inputs(); + gtl::InlinedVector succ_control_edges; + gtl::InlinedVector, 4> succ_in(succ_num); + FillInputs(succ, &succ_control_edges, &succ_in); + + const int pred_num = pred->num_inputs(); + gtl::InlinedVector pred_control_edges; + gtl::InlinedVector, 4> pred_in(pred_num); + FillInputs(pred, &pred_control_edges, &pred_in); + + // We need to ensure that Pad only feeds to Conv2D (some other operator is + // not expecting output of Pad). If this is not the case, then we cannot + // merge Conv2D with Pad. + const int kFirstOutputSlot = 0; + for (const Edge* e : pred->out_edges()) { + if (e->src_output() == kFirstOutputSlot && e->dst() != succ) { + return Status(error::Code::INVALID_ARGUMENT, + "Pad does not feed to Conv2D, or " + "it feeds Conv2D but has multiple outputs. " + "Will skip node merge optimization"); + } + } + + // 2. Get inputs from both the nodes. ( ? ? Explanation of the following) + // Find the 2 inputs from the Pad and the Filter input from the Conv2D. + // Get operand 0, 1 of conv2D. + CHECK_EQ(pred->in_edges().size(), 2); // Pad must have 2 inputs. + // Get operand 1 of add_bias??? + // Conv2D must have 2 inputs: pad output and Filter + CHECK_EQ(succ->in_edges().size(), 2); + + // We will use the node name of Conv2D as the name of new node + // Build new node. We use same name as original node, but change the op + // name. + NodeBuilder nb(succ->name(), csinfo_.pad_with_conv2d); + nb.Input(pred_in[0].first, pred_in[0].second); // In1 (input data) of Pad + // pred_in[1] will be 2nd Tensorflow tensor for Conv2D. + nb.Input(succ_in[1].first, succ_in[1].second); // In2 (filter) of conv2d + // In1 of Conv2D is same as output of Pad. + // Thus, only need to add In2 of Conv2D + nb.Input(pred_in[1].first, pred_in[1].second); // In2 (paddings) of Pad + + // Copy attributes from Pad and conv2D to PadWithConv2D. + CopyAttrsFromPadAndConv2D(const_cast(succ), const_cast(pred), + &nb); + + // Copy the device assigned to old node to new node. + nb.Device(succ->def().device()); + + // Create node. + Node* new_node; + TF_CHECK_OK(nb.Finalize(&**g, &new_node)); + CHECK_NOTNULL(new_node); + + // Incoming data edges from 'pred' node and 'succ' node to new 'new_node' + // node are already copied in BuildNode. + // We handle control edges now. + for (const Edge* e : pred->in_edges()) { + if (e->IsControlEdge()) { + // Allow duplicate while adding control edge as it would fail (return + // NULL) if we try to add duplicate edge. + CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true)); + } + } + for (const Edge* e : succ->in_edges()) { + if (e->IsControlEdge()) { + // Allow duplicate while adding control edge as it would fail (return + // NULL) if we try to add duplicate edge. + CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true)); + } + } + + // Incoming edges are fixed, we will fix the outgoing edges now. + // First, we will fix outgoing control edges from 'pred' node. + for (const Edge* e : pred->out_edges()) { + if (e->IsControlEdge()) { + // Allow duplicate while adding control edge as it would fail (return + // NULL) if we try to add duplicate edge. + CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true)); + } + } + + // Second, we will fix outgoing control and data edges from 'succ' node. + for (const Edge* e : succ->out_edges()) { + if (e->IsControlEdge()) { + // Allow duplicate while adding control edge as it would fail (return + // NULL) if we try to add duplicate edge. + CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true)); + } else { + // Conv2D has only 1 output (at slot 0) and merged node also has only 1 + // output (at slot 0). + const int kPadWithConv2DOutputSlot = 0; + CHECK_NOTNULL((*g)->AddEdge(new_node, kPadWithConv2DOutputSlot, e->dst(), + e->dst_input())); + } + } + + // Copy device assigned to old node to new node. + // It's ok to use pred or succ as we have enforced a check that + // both have same device assigned. + new_node->set_assigned_device_name(pred->assigned_device_name()); + + VLOG(1) << "MklLayoutRewritePass: Merged old node:" << pred->DebugString() + << ", and node: " << succ->DebugString() + << ", into node:" << new_node->DebugString(); + + (*g)->RemoveNode(succ); + (*g)->RemoveNode(pred); + + return Status::OK(); +} + Status MklLayoutRewritePass::MergeConv2DBackpropFilterWithBiasAddGrad( std::unique_ptr* g, Node* m, Node* n) { CHECK_EQ(((m->type_string() == csinfo_.bias_add_grad && @@ -4096,6 +4381,12 @@ Status MklLayoutRewritePass::MergeNode(std::unique_ptr* g, Node* m, m->type_string() == csinfo_.conv2d))) { return this->MergeConv2DWithBiasAdd(g, m, n); } + if (((m->type_string() == csinfo_.pad && + n->type_string() == csinfo_.conv2d)) || + ((n->type_string() == csinfo_.pad && + m->type_string() == csinfo_.conv2d))) { + return this->MergePadWithConv2D(g, m, n); + } if (((m->type_string() == csinfo_.bias_add_grad && n->type_string() == csinfo_.conv2d_grad_filter)) || @@ -4207,9 +4498,10 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { } // We make an exception for __MklDummyConv2DWithBias and - // __MklConv2DBackpropFilterWithBias since their names do not match Mkl node - // names. + // __MklConv2DBackpropFilterWithBias, __MklDummyPadWithConv2D since their names + // do not match Mkl node names. if (n->type_string() != csinfo_.conv2d_with_bias && + n->type_string() != csinfo_.pad_with_conv2d && n->type_string() != csinfo_.conv2d_grad_filter_with_bias && !mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(n->type_string()), T)) { diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index a41f5861af..020e3c9168 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -2012,6 +2012,76 @@ TEST_F(MklLayoutPassTest, Basic) { "A->C;A->D;B->C:1;B->D:1"); } +// Test set 0: Pad + Conv2D; padding is VALID +// A = input(image), B = input(paddings), C= Pad = input of conv2D, +// D=input(filter), E = Conv2D, Z = Zeta +// C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) +// After layout pass +// _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) +TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Positive) { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Int32Input'}" + "node { name: 'C' op: 'Pad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'Tpaddings' value { type: DT_INT32 } }" + " input: ['A', 'B']}" + "node { name: 'D' op: 'Input'}" + "node { name: 'E' op: 'Conv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NHWC' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'VALID' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['C', 'D'] }" + "node { name: 'Y' op: 'Input'}" + "node { name: 'Z' op: 'Zeta'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['E', 'Y']}"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" + "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" + "A:control->DMT/_0:control;A:control->DMT/_1:control;" + "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" + "DMT/_2->E:5;E->Z;Y->Z:1"); +} + +// Test set 0: Pad + Conv2D; padding is SAME +// A = input(image), B = input(paddings), C= Pad = input of conv2D, +// D=input(filter), E = Conv2D, Z = Zeta +// C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) +// After layout pass - No merging +TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Negative) { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Int32Input'}" + "node { name: 'C' op: 'Pad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'Tpaddings' value { type: DT_INT32 } }" + " input: ['A', 'B']}" + "node { name: 'D' op: 'Input'}" + "node { name: 'E' op: 'Conv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NHWC' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'SAME' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['C', 'D'] }" + "node { name: 'Y' op: 'Input'}" + "node { name: 'Z' op: 'Zeta'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['E', 'Y']}"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Int32Input);C(Pad);D(Input);DMT/_0(Const);DMT/_1(Const);" + "E(_MklConv2D);Y(Input);Z(Zeta)|A->C;B->C:1;C->E;" + "C:control->DMT/_0:control;C:control->DMT/_1:control;" + "D->E:1;DMT/_0->E:2;DMT/_1->E:3;E->Z;Y->Z:1"); +} + // Test set 1: Conv2D + AddBias // C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 6126e8b7ba..f14542068d 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -39,6 +39,7 @@ load( "cc_header_only_library", "if_not_windows", "if_override_eigen_strong_inline", + "tf_cc_test_mkl", ) load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl") load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") @@ -1129,6 +1130,7 @@ tf_cuda_cc_test( ], ) + tf_cc_test( name = "decode_wav_op_test", size = "small", @@ -6124,6 +6126,7 @@ tf_mkl_kernel_library( ] + if_mkl(["@mkl_dnn"]), ) + tf_mkl_kernel_library( name = "mkl_tfconv_op", prefix = "mkl_tfconv", @@ -6269,6 +6272,31 @@ tf_mkl_kernel_library( ], ) +tf_cc_test_mkl( + name = "mkl_fused_ops_test", + size = "small", + srcs = ["mkl_fused_ops_test.cc"], + linkstatic = 1, + deps = [ + ":mkl_conv_op", + ":mkl_tfconv_op", + ":conv_ops", + ":image", + ":ops_testutil", + ":ops_util", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ] +) + # NOTE(lespeholt): This rule is deprecated, please use: # tensorflow/core/util/batch_util.h cc_library( diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 62396eeb8b..d4ec831cf2 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -753,9 +753,31 @@ class MklConv2DOp : public OpKernel { TensorFormat data_format_; }; + +#define REGISTER_MKL_CPU(T) \ + REGISTER_KERNEL_BUILDER(Name("_MklConv2D") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklConv2DOp); \ + REGISTER_KERNEL_BUILDER(Name("_MklConv2DWithBias") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklConv2DOp); \ + REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DWithBias") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklDummyOp); + +TF_CALL_float(REGISTER_MKL_CPU); +#undef REGISTER_MKL_CPU + #else -template +template class MklConv2DOp : public OpKernel { public: ~MklConv2DOp() {} @@ -814,6 +836,11 @@ class MklConv2DOp : public OpKernel { dilations, strides; memory::dims dst_dims_tf_order, dst_dims_mkl_order; + // If pad with conv2d fusion is enabled + if (padEnabled) { + PadWithConvFusion(context, padding_left, padding_right); + } + // Get shapes of input tensors in MKL-DNN order MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, dilations_); @@ -822,7 +849,7 @@ class MklConv2DOp : public OpKernel { conv_utl.GetConvFwdSizesInMklOrder( src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides, &dilations, &dst_dims_tf_order, &dst_dims_mkl_order, - &padding_left, &padding_right); + &padding_left, &padding_right, padEnabled); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. @@ -869,7 +896,6 @@ class MklConv2DOp : public OpKernel { // MKLDNN dilation starts from 0. dilations[kDilationH] -= 1; dilations[kDilationW] -= 1; - // get a conv2d fwd from primitive pool MklConv2DFwdPrimitive* conv2d_fwd = nullptr; if (biasEnabled) { @@ -937,13 +963,53 @@ class MklConv2DOp : public OpKernel { errors::Aborted("Operation received an exception:", error_msg)); } } + + void PadWithConvFusion(OpKernelContext* context, memory::dims &padding_left, + memory::dims &padding_right){ + const Tensor& paddings_tf = MklGetInput(context, 2); + OP_REQUIRES(context, paddings_tf.dims() == 2, + errors::InvalidArgument("paddings must be 2-dimensional: ", + paddings_tf.shape().DebugString())); + Tpadding* paddings = nullptr; + // To get individual pad, need to flatten the tensor + paddings = static_cast(const_cast + (paddings_tf.flat().data())); + // For NHWC format: + // paddings[0], paddings[1], paddings[6], paddings[7] should be zero + // if the paddings_tf is [ [0, 0] [1,2] [3,4] [0,0] ] + // paddings = {0, 0, 1, 2, 3, 4, 0, 0} ; flat method is row major + // then, values are: top = 1, bottom =2, left=3, right=4 + // For NCHW format, + // paddings[0], paddings[1], paddings[2], paddings[3] should be zero + // similar explanation as NHWC format will apply. + string data_format = ToString(data_format_); + if(data_format == "NHWC"){ + pad_top = paddings[2]; + pad_bottom = paddings[3]; + pad_left = paddings[4]; + pad_right = paddings[5]; + } + else if (data_format == "NCHW"){ + pad_top = paddings[4]; + pad_bottom = paddings[5]; + pad_left = paddings[6]; + pad_right = paddings[7]; + } + // Create padding arrays for MKL DNN convolutions. + // MKL-DNN uses asymetric padding. + padding_left = {static_cast(pad_top), static_cast(pad_left)}; + padding_right = {static_cast(pad_bottom), static_cast(pad_right)}; + } private: std::vector strides_; std::vector dilations_; + int64 pad_top, pad_left; + int64 pad_bottom, pad_right; Padding padding_; TensorFormat data_format_; const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2; + const int kInputIndex_Pad = 2; const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1; const int kDilationH = 0, kDilationW = 1; engine cpu_engine = engine(engine::cpu, 0); @@ -1036,26 +1102,44 @@ class MklConv2DOp : public OpKernel { } }; -#endif #define REGISTER_MKL_CPU(T) \ REGISTER_KERNEL_BUILDER(Name("_MklConv2D") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ .Label(mkl_op_registry::kMklOpLabel), \ - MklConv2DOp); \ + MklConv2DOp); \ REGISTER_KERNEL_BUILDER(Name("_MklConv2DWithBias") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ .Label(mkl_op_registry::kMklOpLabel), \ - MklConv2DOp); \ + MklConv2DOp); \ REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DWithBias") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ .Label(mkl_op_registry::kMklOpLabel), \ + MklDummyOp); \ + REGISTER_KERNEL_BUILDER(Name("_MklPadWithConv2D") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tpaddings") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklConv2DOp); \ + REGISTER_KERNEL_BUILDER(Name("_MklPadWithConv2D") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tpaddings") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklConv2DOp); \ + REGISTER_KERNEL_BUILDER(Name("__MklDummyPadWithConv2D") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tpaddings") \ + .Label(mkl_op_registry::kMklOpLabel), \ MklDummyOp); TF_CALL_float(REGISTER_MKL_CPU); +#endif } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 3f154ff33b..c6487a4512 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -232,7 +232,7 @@ class MklDnnConvUtil { const memory::dims& strides, const memory::dims& dilations, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, - memory::dims* pad_r) { + memory::dims* pad_r, bool padEnabled=false) { CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -268,7 +268,19 @@ class MklDnnConvUtil { GetWindowedOutputSizeVerboseV2(input_cols, filter_cols, dilation_cols, stride_cols, padding_, &out_cols, &pad_left, &pad_right)); - + // If padEnabled, i.e., pad and conv op are fused, then + // all pads are already passed from pad op through + // *pad_l and *pad_r + if(padEnabled) { + pad_top = static_cast((*pad_l)[0]); + pad_left = static_cast((*pad_l)[1]); + pad_bottom = static_cast((*pad_r)[0]); + pad_right = static_cast((*pad_r)[1]); + // update the out_rows and out_cols based on all + // sides of the pads coming from pad op. + out_rows = out_rows + (pad_top + pad_bottom ) / stride_rows; + out_cols = out_cols + (pad_left + pad_right ) / stride_cols; + } // Tensorflow output is in data_format order. (NHWC or NCHW) TensorShape out_shape = ShapeFromFormat(data_format_, out_batch, out_rows, out_cols, out_depth); @@ -283,8 +295,12 @@ class MklDnnConvUtil { *output_dims_mkl_order = mkldnn_sizes; // Now handle padding. MKL-DNN uses asymetric padding. - *pad_l = {static_cast(pad_top), static_cast(pad_left)}; - *pad_r = {static_cast(pad_bottom), static_cast(pad_right)}; + // But, if padEnabled, i.e., pad and conv op are fused, + // then, *pad_l and *pad_r are already set from pad op + if(!padEnabled) { + *pad_l = {static_cast(pad_top), static_cast(pad_left)}; + *pad_r = {static_cast(pad_bottom), static_cast(pad_right)}; + } } // Calculate output and pad size of forward Convolution operator. @@ -325,7 +341,7 @@ class MklDnnConvUtil { memory::dims* strides, memory::dims *dilations, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, - memory::dims* pad_r) { + memory::dims* pad_r, bool padEnabled=false) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); @@ -344,7 +360,7 @@ class MklDnnConvUtil { GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, *dilations, output_dims_tf_order, output_dims_mkl_order, - pad_l, pad_r); + pad_l, pad_r, padEnabled); if (!context_->status().ok()) return; } }; diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index f947d4c30d..8bb22a8372 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1573,6 +1573,55 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is expected to invoke these operators. )doc"); +REGISTER_OP("__MklDummyPadWithConv2D") + .Input("input: T") + .Input("filter: T") + .Input("paddings: Tpaddings") + .Output("output: T") + .Attr("T: {half, float, double}") + .Attr("strides: list(int)") + .Attr("use_cudnn_on_gpu: bool = true") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") + .Attr("Tpaddings: {int32, int64} = DT_INT32") + .SetShapeFn(shape_inference::Conv2DShape) + .Doc(R"doc( +Dummy node that enables fusing Pad and Conv2D operator for MKL. This node +does not perform anything. It is just created as an intermediate output of +merging Pad and Conv2D. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + +REGISTER_OP("_MklPadWithConv2D") + .Input("input: T") + .Input("filter: T") + .Input("paddings: Tpaddings") + .Input("mkl_input: uint8") + .Input("mkl_filter: uint8") + .Input("mkl_paddings: uint8") + .Output("output: T") + .Output("filter_output: T") + .Output("mkl_output: uint8") + .Output("mkl_filter_output: uint8") + .Attr("T: {half, float, double}") + .Attr("strides: list(int)") + .Attr("use_cudnn_on_gpu: bool = true") + .Attr(GetPaddingAttrString()) + .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") + .Attr("Tpaddings: {int32, int64} = DT_INT32") + .SetShapeFn(shape_inference::Conv2DShape) + .Doc(R"doc( +MKL version of Pad and Conv2D operator. Uses MKL DNN APIs to perform +Pad and 2D convolution to the output of convolution. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + REGISTER_OP("_MklConv2DBackpropFilter") .Input("input: T") .Input("filter_sizes: int32") -- GitLab From dd63093a599081accfe2a2d2ca8c029d413a15d7 Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Thu, 2 Aug 2018 08:43:06 -0700 Subject: [PATCH 0061/1078] adding unit test for pad+conv2d fusion op --- tensorflow/core/kernels/mkl_fused_ops_test.cc | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 tensorflow/core/kernels/mkl_fused_ops_test.cc diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc new file mode 100644 index 0000000000..216e8d0206 --- /dev/null +++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc @@ -0,0 +1,164 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifdef INTEL_MKL +#ifndef INTEL_MKL_ML // We don't support fusion in MKL ML +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/image_ops.h" +#include "tensorflow/cc/ops/nn_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/conv_ops_gpu.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/public/session.h" + +namespace tensorflow { + +// Helper class for converting MKL tesnors to TF tensor and comparing to +// expected values + +const uint8 dummy_tensor[] = {0, 0, 0, 0, 0, 0, 0, 0}; +const TensorShape dummy_shape({8}); + +class ConvMklToTF : public OpsTestBase { + public: + template + void ConvertAndCompare(DataType dtype, const Tensor& first, + const Tensor& second, const Tensor& expected) { + // Create an MKL to TF conversion node and execute it + TF_EXPECT_OK(NodeDefBuilder("mkl_to_tf_op", "_MklToTf") + .Input(FakeInput(dtype)) // Input + .Input(FakeInput(DT_UINT8)) // Mkl second tensor + .Attr("T", dtype) + .Attr("_kernel", "MklOp") + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + AddInputFromArray(first.shape(), first.flat()); + AddInputFromArray(second.shape(), second.flat()); + TF_ASSERT_OK(RunOpKernel()); + + const Tensor& output = *GetOutput(0); + test::ExpectTensorNear(expected, output, 1e-5); + } + void TestBody(){}; +}; + +// Testing fusion of pad and convolution + +class FusedPadConvOpTest : public OpsTestBase { + public: + template + void Run(DataType dtype, Tensor& image, Tensor& filter, Tensor& padding, + Tensor& expected, const string data_format) { + const int stride = 1; + + // Create a fused pad+conv2d node + TF_EXPECT_OK(NodeDefBuilder("fused_pad_conv_op", "_MklPadWithConv2D") + .Input(FakeInput(dtype)) // Input + .Input(FakeInput(dtype)) // Filter + .Input(FakeInput(DT_INT32)) // Padding + .Input(FakeInput(DT_UINT8)) // MKl second tensor + .Input(FakeInput(DT_UINT8)) // MKl second tensor + .Input(FakeInput(DT_UINT8)) // MKl second tensor + .Attr("padding", "VALID") + .Attr("data_format", data_format) + .Attr("T", dtype) + .Attr("strides", {1, stride, stride, 1}) + .Attr("_kernel", "MklOp") + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + + // Setting up inputs and execute + AddInputFromArray(image.shape(), image.flat()); + AddInputFromArray(filter.shape(), filter.flat()); + AddInputFromArray(padding.shape(), padding.flat()); + AddInputFromArray(dummy_shape, dummy_tensor); + AddInputFromArray(dummy_shape, dummy_tensor); + AddInputFromArray(dummy_shape, dummy_tensor); + TF_ASSERT_OK(RunOpKernel()); + + // Compare output to expected results + const Tensor& first = *GetOutput(0); + const Tensor& second = *GetOutput(2); + ConvMklToTF conv_comp; + conv_comp.ConvertAndCompare(dtype, first, second, expected); + } +}; + +TEST_F(FusedPadConvOpTest, PaddingConvTest) { + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; + Tensor image(DT_FLOAT, {image_batch_count, image_height, image_width, depth}); + test::FillValues(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + + const int filter_size = 3; + const int filter_count = 1; + Tensor filter(DT_FLOAT, {filter_size, filter_size, depth, filter_count}); + test::FillValues(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9}); + + const int padding_height = 4; + const int padding_width = 2; + Tensor padding(DT_INT32, {padding_height, padding_width}); + test::FillValues(&padding, {0, 0, 3, 4, 1, 2, 0, 0}); + + Tensor expected(DT_FLOAT, TensorShape({1, 8, 5, 1})); + test::FillValues( + &expected, + {0, 0, 0, 0, 0, 24, 42, 60, 33, 12, 105, 150, 183, 95, + 32, 235, 312, 357, 178, 56, 187, 234, 261, 121, 32, 106, 126, 138, + 59, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); + + Run(DT_FLOAT, image, filter, padding, expected, "NHWC"); +} + +TEST_F(FusedPadConvOpTest, PaddingConvTestNchw) { + const int depth = 1; + const int image_width = 4; + const int image_height = 3; + const int image_batch_count = 1; + Tensor image(DT_FLOAT, {image_batch_count, depth, image_height, image_width}); + test::FillValues(&image, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); + + const int filter_size = 3; + const int filter_count = 1; + Tensor filter(DT_FLOAT, {filter_size, filter_size, depth, filter_count}); + test::FillValues(&filter, {1, 4, 7, 2, 5, 8, 3, 6, 9}); + + const int padding_height = 4; + const int padding_width = 2; + Tensor padding(DT_INT32, {padding_height, padding_width}); + test::FillValues(&padding, {0, 0, 0, 0, 3, 4, 1, 2}); + + Tensor expected(DT_FLOAT, TensorShape({1, 1, 8, 5})); + test::FillValues( + &expected, + {0, 0, 0, 0, 0, 24, 42, 60, 33, 12, 105, 150, 183, 95, + 32, 235, 312, 357, 178, 56, 187, 234, 261, 121, 32, 106, 126, 138, + 59, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); + + Run(DT_FLOAT, image, filter, padding, expected, "NCHW"); +} +} // namespace tensorflow +#endif // INTEL_MKL_ML +#endif // INTEL_MKL -- GitLab From 7f94025fe72369117bf32d69156f0bd947402c96 Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Fri, 10 Aug 2018 20:00:27 -0700 Subject: [PATCH 0062/1078] Addressing the reviews for fused PAD and Conv2d PR --- tensorflow/core/graph/mkl_layout_pass.cc | 75 +++++++++---------- tensorflow/core/kernels/BUILD | 22 +++++- tensorflow/core/kernels/mkl_conv_ops.cc | 8 +- tensorflow/core/kernels/mkl_conv_ops.h | 12 ++- tensorflow/core/kernels/mkl_fused_ops_test.cc | 8 +- 5 files changed, 69 insertions(+), 56 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index d0abe5da35..1e85b50d99 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2786,50 +2786,48 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // Find Pad or Conv2D node that can be merged with input node 'm'. // If input 'm' is Pad, then check if there exists Conv2D node that can be - // merged with 'm'. If input 'm' is Conv2D, then check if there exists BiasAdd + // merged with 'm'. If input 'm' is Conv2D, then check if there exists Pad // node that can be merged with 'm'. static Node* GetPadOrConv2D(const Node* m) { CHECK_NOTNULL(m); Node* n = nullptr; + const Node* conv_node; if (m->type_string() == csinfo_.pad) { // If m is Pad, then Conv2D is the output of Pad. for (const Edge* e : m->out_edges()) { if (!e->IsControlEdge() && e->dst()->type_string() == csinfo_.conv2d) { n = e->dst(); + conv_node = n; break; } } } else { CHECK_EQ(m->type_string(), csinfo_.conv2d); - // If m is conv2D, Go over all input edges + // If m is conv2D, Go over all input edges // and search for Pad Node. for (const Edge* e : m->in_edges()) { if (!e->IsControlEdge() && e->src()->type_string() == csinfo_.pad) { n = e->src(); + conv_node = m; break; } } } - // Check if only VALID type of padding is used - // or not. + // Check if only VALID type of padding is used + // or not. if (n != nullptr) { - const Node* conv_node; - if (m->type_string() == csinfo_.conv2d) - conv_node = m; - else - conv_node = n; string padding; TF_CHECK_OK(GetNodeAttr(conv_node->def(), "padding", &padding)); - if (padding != "VALID") - // Then do not merge. - // Only VALID type of padding in conv op can be + if (padding != "VALID") + // Then do not merge. + // Only VALID type of padding in conv op can be // merged with Pad op. n = nullptr; } - if (n == nullptr) { + else { VLOG(1) << "MklLayoutRewritePass: Could not find matching " << "Pad and Conv2D node for merging. Input node: " << m->DebugString(); @@ -3669,7 +3667,7 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); } -//used in rinfo when replacing __MklDummyPadWithConv2D by _MklPadWithConv2D +// Used in rinfo when replacing __MklDummyPadWithConv2D by _MklPadWithConv2D void MklLayoutRewritePass::CopyAttrsPadWithConv2D(const Node* orig_node, NodeBuilder* nb) { DataType Tpaddings; @@ -3677,11 +3675,13 @@ void MklLayoutRewritePass::CopyAttrsPadWithConv2D(const Node* orig_node, string data_format; string padding; std::vector strides; + std::vector dilations; bool use_cudnn_on_gpu; - // Get all attributes from old node 1. + // Get all attributes from old node. TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); TF_CHECK_OK( @@ -3691,13 +3691,14 @@ void MklLayoutRewritePass::CopyAttrsPadWithConv2D(const Node* orig_node, // Add attributes to new node. nb->Attr("T", T); nb->Attr("strides", strides); + nb->Attr("dilations", dilations); nb->Attr("padding", padding); nb->Attr("data_format", data_format); nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); nb->Attr("Tpaddings", Tpaddings); } -//used with MergePadWithConv2D +// Used with MergePadWithConv2D void MklLayoutRewritePass::CopyAttrsFromPadAndConv2D(const Node* orig_node1, const Node* orig_node2, NodeBuilder* nb) { DataType Tpaddings; @@ -3705,11 +3706,13 @@ void MklLayoutRewritePass::CopyAttrsFromPadAndConv2D(const Node* orig_node1, string data_format; string padding; std::vector strides; + std::vector dilations; bool use_cudnn_on_gpu; // Get all attributes from old node 1. TF_CHECK_OK(GetNodeAttr(orig_node1->def(), "T", &T)); TF_CHECK_OK(GetNodeAttr(orig_node1->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(orig_node1->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(orig_node1->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(orig_node1->def(), "data_format", &data_format)); TF_CHECK_OK( @@ -3720,12 +3723,10 @@ void MklLayoutRewritePass::CopyAttrsFromPadAndConv2D(const Node* orig_node1, // Add attributes to new node. nb->Attr("T", T); nb->Attr("strides", strides); + nb->Attr("dilations", dilations); nb->Attr("padding", padding); nb->Attr("data_format", data_format); nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); - - - // Add attributes to new node. nb->Attr("Tpaddings", Tpaddings); } void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node, @@ -3954,7 +3955,7 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, // If 'm' is BiasAdd, then 'n' is Conv2D. Since Conv2D feeds BiasAdd, // BiasAdd is successor node, and Conv2D predecessor node. Node* pred = m->type_string() == csinfo_.bias_add ? n : m; - Node* succ = m->type_string() == csinfo_.bias_add ? m : n; + Node* succ = m->type_string() == csinfo_.bias_add ? m : n; // 1. Get all attributes from input nodes. DataType T_pred, T_succ; @@ -4095,11 +4096,10 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, Node* m, Node* n) { - CHECK_EQ(((m->type_string() == csinfo_.pad && + CHECK(((m->type_string() == csinfo_.pad && n->type_string() == csinfo_.conv2d)) || ((n->type_string() == csinfo_.pad && - m->type_string() == csinfo_.conv2d)), - true); + m->type_string() == csinfo_.conv2d))); // Conv2D is successor node, and Pad predecessor node. Node* pred = m->type_string() == csinfo_.pad ? m : n; @@ -4117,22 +4117,18 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, TF_CHECK_OK(GetNodeAttr(succ->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(succ->def(), "strides", &strides)); TF_CHECK_OK(GetNodeAttr(succ->def(), "dilations", &dilations)); - // data format for pad is not available and not necessary, thus - // we dont need to match data format - // TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred)); + // Data format for pad is not available and not necessary, thus + // dont need to match data format for Pad TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ)); TF_CHECK_OK(GetNodeAttr(succ->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu)); - // We check to ensure that data formats of both succ and pred are same. - // We expect them to be same, so we can enforce this as assert. - // But assert can be too strict, so we enforce this as a check. - // If the check fails, then we do not merge two nodes. - // We also do same check for devices. - // if (data_format_pred != data_format_succ || T_pred != T_succ || + // Check if the data types and devices of both succ and pred are the same. + // Assert is not used, because it can be too strict. + // Don't need to check for data formats because it is not available in Pad. if (T_pred != T_succ || pred->assigned_device_name() != succ->assigned_device_name() || pred->def().device() != succ->def().device()) { return Status(error::Code::INVALID_ARGUMENT, - "data_format or T attribute or devices of Conv2D and " + "T attribute or devices of Conv2D and " "Pad do not match. Will skip node merge optimization"); } @@ -4159,11 +4155,10 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, } } - // 2. Get inputs from both the nodes. ( ? ? Explanation of the following) - // Find the 2 inputs from the Pad and the Filter input from the Conv2D. - // Get operand 0, 1 of conv2D. - CHECK_EQ(pred->in_edges().size(), 2); // Pad must have 2 inputs. - // Get operand 1 of add_bias??? + // 2. Get inputs from both the nodes. + + // Pad must have 2 inputs: "input" and paddings. + CHECK_EQ(pred->in_edges().size(), 2); // Conv2D must have 2 inputs: pad output and Filter CHECK_EQ(succ->in_edges().size(), 2); @@ -4497,8 +4492,8 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { return nullptr; } - // We make an exception for __MklDummyConv2DWithBias and - // __MklConv2DBackpropFilterWithBias, __MklDummyPadWithConv2D since their names + // We make an exception for __MklDummyConv2DWithBias, + // __MklConv2DBackpropFilterWithBias, and __MklDummyPadWithConv2D since their names // do not match Mkl node names. if (n->type_string() != csinfo_.conv2d_with_bias && n->type_string() != csinfo_.pad_with_conv2d && diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index f14542068d..b057b78ace 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -22,6 +22,7 @@ package_group( "//learning/brain/research/sparse_matrix/...", "//learning/faster_training/...", "//tensorflow/...", + "//third_party/car/...", ], ) @@ -783,7 +784,7 @@ tf_kernel_library( tf_kernel_library( name = "quantize_and_dequantize_op", prefix = "quantize_and_dequantize_op", - deps = ARRAY_DEPS, + deps = ARRAY_DEPS + [":cwise_op"], ) tf_kernel_library( @@ -1130,7 +1131,6 @@ tf_cuda_cc_test( ], ) - tf_cc_test( name = "decode_wav_op_test", size = "small", @@ -2855,6 +2855,8 @@ tf_kernel_library( srcs = [] + if_mkl([ "mkl_batch_matmul_op.cc", ]), + # *impl.h are excluded by default from the CPU build, add explicitly. + hdrs = ["batch_matmul_op_impl.h"], # Override EIGEN_STRONG_INLINE to inline when --define=override_eigen_strong_inline=true, # to avoid long compiling time. See https://github.com/tensorflow/tensorflow/issues/10521 copts = if_override_eigen_strong_inline(["/DEIGEN_STRONG_INLINE=inline"]), @@ -3791,7 +3793,7 @@ tf_kernel_library( "spacetodepth_op.h", "spacetodepth_op_gpu.cu.cc", ], - visibility = ["//visibility:private"], + visibility = [":friends"], deps = [ "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -4888,6 +4890,7 @@ filegroup( "fill_functor.cc", "fill_functor.h", "function_ops.cc", + "function_ops.h", "gather_functor.h", "gather_nd_op.cc", "gather_nd_op.h", @@ -5379,6 +5382,18 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "android_whole_file_read_ops", + srcs = if_android(["whole_file_read_ops.cc"]), + copts = tf_copts(), + linkopts = ["-ldl"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:android_tensorflow_lib_lite", + ], + alwayslink = 1, +) + # Quantization-specific OpKernels tf_kernel_library( @@ -6126,7 +6141,6 @@ tf_mkl_kernel_library( ] + if_mkl(["@mkl_dnn"]), ) - tf_mkl_kernel_library( name = "mkl_tfconv_op", prefix = "mkl_tfconv", diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index d4ec831cf2..b5ae312fa5 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -979,9 +979,11 @@ class MklConv2DOp : public OpKernel { // if the paddings_tf is [ [0, 0] [1,2] [3,4] [0,0] ] // paddings = {0, 0, 1, 2, 3, 4, 0, 0} ; flat method is row major // then, values are: top = 1, bottom =2, left=3, right=4 - // For NCHW format, + // For NCHW format: // paddings[0], paddings[1], paddings[2], paddings[3] should be zero // similar explanation as NHWC format will apply. + int64 pad_top, pad_left; + int64 pad_bottom, pad_right; string data_format = ToString(data_format_); if(data_format == "NHWC"){ pad_top = paddings[2]; @@ -1004,8 +1006,6 @@ class MklConv2DOp : public OpKernel { private: std::vector strides_; std::vector dilations_; - int64 pad_top, pad_left; - int64 pad_bottom, pad_right; Padding padding_; TensorFormat data_format_; const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2; @@ -1139,7 +1139,7 @@ class MklConv2DOp : public OpKernel { MklDummyOp); TF_CALL_float(REGISTER_MKL_CPU); -#endif +#endif // INTEL_MKL_ML } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index c6487a4512..aae4d767a2 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -281,6 +281,14 @@ class MklDnnConvUtil { out_rows = out_rows + (pad_top + pad_bottom ) / stride_rows; out_cols = out_cols + (pad_left + pad_right ) / stride_cols; } + // Handle padding. MKL-DNN uses asymetric padding. + // But, if padEnabled, i.e., pad and conv op are fused, + // then, *pad_l and *pad_r are already set from pad op. + // In that case they need not set here. + else { + *pad_l = {static_cast(pad_top), static_cast(pad_left)}; + *pad_r = {static_cast(pad_bottom), static_cast(pad_right)}; + } // Tensorflow output is in data_format order. (NHWC or NCHW) TensorShape out_shape = ShapeFromFormat(data_format_, out_batch, out_rows, out_cols, out_depth); @@ -297,10 +305,6 @@ class MklDnnConvUtil { // Now handle padding. MKL-DNN uses asymetric padding. // But, if padEnabled, i.e., pad and conv op are fused, // then, *pad_l and *pad_r are already set from pad op - if(!padEnabled) { - *pad_l = {static_cast(pad_top), static_cast(pad_left)}; - *pad_r = {static_cast(pad_bottom), static_cast(pad_right)}; - } } // Calculate output and pad size of forward Convolution operator. diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc index 216e8d0206..e408886861 100644 --- a/tensorflow/core/kernels/mkl_fused_ops_test.cc +++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,11 +33,11 @@ limitations under the License. namespace tensorflow { -// Helper class for converting MKL tesnors to TF tensor and comparing to +// Helper class for converting MKL tesnors to TF tensors and comparing to // expected values -const uint8 dummy_tensor[] = {0, 0, 0, 0, 0, 0, 0, 0}; -const TensorShape dummy_shape({8}); +static const uint8 dummy_tensor[] = {0, 0, 0, 0, 0, 0, 0, 0}; +static const TensorShape dummy_shape({8}); class ConvMklToTF : public OpsTestBase { public: -- GitLab From 819afabbeda709a94894c894515b62c85d236d50 Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Mon, 13 Aug 2018 10:44:36 -0700 Subject: [PATCH 0063/1078] modifying the ifdef INTEL_MKL_ML to INTEL_MKL_ML_ONLY --- tensorflow/core/kernels/mkl_fused_ops_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc index e408886861..900325ac91 100644 --- a/tensorflow/core/kernels/mkl_fused_ops_test.cc +++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #ifdef INTEL_MKL -#ifndef INTEL_MKL_ML // We don't support fusion in MKL ML +#ifndef INTEL_MKL_ML_ONLY // We don't support fusion in MKL ML #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/image_ops.h" #include "tensorflow/cc/ops/nn_ops.h" @@ -160,5 +160,5 @@ TEST_F(FusedPadConvOpTest, PaddingConvTestNchw) { Run(DT_FLOAT, image, filter, padding, expected, "NCHW"); } } // namespace tensorflow -#endif // INTEL_MKL_ML +#endif // INTEL_MKL_ML_ONLY #endif // INTEL_MKL -- GitLab From 6b292c27c7ad09a89c8b75c2505e6472b533a4e1 Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Mon, 13 Aug 2018 15:03:18 -0700 Subject: [PATCH 0064/1078] formatted as Clang format for Google code compliance, replaced directive INTEL_MKL_ML by INTEL_MKL_ML_ONLY, and merged with master --- tensorflow/core/graph/mkl_layout_pass.cc | 69 +++++++++---------- tensorflow/core/graph/mkl_layout_pass_test.cc | 19 ++--- tensorflow/core/kernels/mkl_conv_ops.cc | 47 +++++++------ tensorflow/core/kernels/mkl_conv_ops.h | 42 +++++------ 4 files changed, 86 insertions(+), 91 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 65b999b193..84e8ea8f70 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2551,10 +2551,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad, csinfo_.conv2d_grad_filter_with_bias, GetConv2DBackpropFilterOrBiasAddGrad}); - minfo_.push_back({csinfo_.pad, csinfo_.conv2d, - csinfo_.pad_with_conv2d, GetPadOrConv2D}); - //TODO : Need to check if pad is with zero or not - // if is zero then replace, if not then do not replace + minfo_.push_back( + {csinfo_.pad, csinfo_.conv2d, csinfo_.pad_with_conv2d, GetPadOrConv2D}); + // Merge Pad and Conv2d, only if the pad op is "Pad" + // Doesn't merge if pad op is "PadV2" or "MirrorPad" } // Standard interface to run pass @@ -2792,42 +2792,39 @@ class MklLayoutRewritePass : public GraphOptimizationPass { CHECK_NOTNULL(m); Node* n = nullptr; - const Node* conv_node; + const Node* conv_node; if (m->type_string() == csinfo_.pad) { // If m is Pad, then Conv2D is the output of Pad. for (const Edge* e : m->out_edges()) { - if (!e->IsControlEdge() && - e->dst()->type_string() == csinfo_.conv2d) { + if (!e->IsControlEdge() && e->dst()->type_string() == csinfo_.conv2d) { n = e->dst(); - conv_node = n; + conv_node = n; break; } } } else { CHECK_EQ(m->type_string(), csinfo_.conv2d); - // If m is conv2D, Go over all input edges + // If m is conv2D, Go over all input edges // and search for Pad Node. for (const Edge* e : m->in_edges()) { - if (!e->IsControlEdge() && - e->src()->type_string() == csinfo_.pad) { + if (!e->IsControlEdge() && e->src()->type_string() == csinfo_.pad) { n = e->src(); - conv_node = m; + conv_node = m; break; } } } - // Check if only VALID type of padding is used - // or not. + // Check if only VALID type of padding is used + // or not. if (n != nullptr) { string padding; TF_CHECK_OK(GetNodeAttr(conv_node->def(), "padding", &padding)); - if (padding != "VALID") - // Then do not merge. - // Only VALID type of padding in conv op can be + if (padding != "VALID") + // Then do not merge. + // Only VALID type of padding in conv op can be // merged with Pad op. n = nullptr; - } - else { + } else { VLOG(1) << "MklLayoutRewritePass: Could not find matching " << "Pad and Conv2D node for merging. Input node: " << m->DebugString(); @@ -3155,7 +3152,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsPadWithConv2D(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsFromPadAndConv2D(const Node* orig_node1, const Node* orig_node2, + static void CopyAttrsFromPadAndConv2D(const Node* orig_node1, + const Node* orig_node2, NodeBuilder* nb); static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb); @@ -3356,7 +3354,7 @@ int MklLayoutRewritePass::SetUpContiguousInputs( // 2nd input (slot 1) of _MklConv2D and _MklConv2DWithBias. for (const Edge* e : filter_node->out_edges()) { if ((e->dst()->type_string() == csinfo_.mkl_conv2d || - // add check for mkl_pad_with_conv2d + // add check for mkl_pad_with_conv2d e->dst()->type_string() == csinfo_.mkl_pad_with_conv2d || e->dst()->type_string() == csinfo_.mkl_conv2d_with_bias) && e->dst_input() == kConv2DFilterInputSlotIdx @@ -3669,7 +3667,7 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, // Used in rinfo when replacing __MklDummyPadWithConv2D by _MklPadWithConv2D void MklLayoutRewritePass::CopyAttrsPadWithConv2D(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb) { DataType Tpaddings; DataType T; string data_format; @@ -3700,7 +3698,8 @@ void MklLayoutRewritePass::CopyAttrsPadWithConv2D(const Node* orig_node, // Used with MergePadWithConv2D void MklLayoutRewritePass::CopyAttrsFromPadAndConv2D(const Node* orig_node1, - const Node* orig_node2, NodeBuilder* nb) { + const Node* orig_node2, + NodeBuilder* nb) { DataType Tpaddings; DataType T; string data_format; @@ -4095,12 +4094,12 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, } Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, - Node* m, Node* n) { + Node* m, Node* n) { CHECK(((m->type_string() == csinfo_.pad && - n->type_string() == csinfo_.conv2d)) || - ((n->type_string() == csinfo_.pad && - m->type_string() == csinfo_.conv2d))); - + n->type_string() == csinfo_.conv2d)) || + ((n->type_string() == csinfo_.pad && + m->type_string() == csinfo_.conv2d))); + // Conv2D is successor node, and Pad predecessor node. Node* pred = m->type_string() == csinfo_.pad ? m : n; Node* succ = m->type_string() == csinfo_.pad ? n : m; @@ -4158,7 +4157,7 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, // 2. Get inputs from both the nodes. // Pad must have 2 inputs: "input" and paddings. - CHECK_EQ(pred->in_edges().size(), 2); + CHECK_EQ(pred->in_edges().size(), 2); // Conv2D must have 2 inputs: pad output and Filter CHECK_EQ(succ->in_edges().size(), 2); @@ -4174,8 +4173,8 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, nb.Input(pred_in[1].first, pred_in[1].second); // In2 (paddings) of Pad // Copy attributes from Pad and conv2D to PadWithConv2D. - CopyAttrsFromPadAndConv2D(const_cast(succ), const_cast(pred), - &nb); + CopyAttrsFromPadAndConv2D(const_cast(succ), + const_cast(pred), &nb); // Copy the device assigned to old node to new node. nb.Device(succ->def().device()); @@ -4186,7 +4185,7 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, CHECK_NOTNULL(new_node); // Incoming data edges from 'pred' node and 'succ' node to new 'new_node' - // node are already copied in BuildNode. + // node are already copied in BuildNode. // We handle control edges now. for (const Edge* e : pred->in_edges()) { if (e->IsControlEdge()) { @@ -4493,10 +4492,10 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { } // We make an exception for __MklDummyConv2DWithBias, - // __MklConv2DBackpropFilterWithBias, and __MklDummyPadWithConv2D since their names - // do not match Mkl node names. + // __MklConv2DBackpropFilterWithBias, and __MklDummyPadWithConv2D since their + // names do not match Mkl node names. if (n->type_string() != csinfo_.conv2d_with_bias && - n->type_string() != csinfo_.pad_with_conv2d && + n->type_string() != csinfo_.pad_with_conv2d && n->type_string() != csinfo_.conv2d_grad_filter_with_bias && !mkl_op_registry::IsMklOp(mkl_op_registry::GetMklOpName(n->type_string()), T)) { diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 2925b1bde0..d1b39ceeca 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -2013,11 +2013,11 @@ TEST_F(MklLayoutPassTest, Basic) { } // Test set 0: Pad + Conv2D; padding is VALID -// A = input(image), B = input(paddings), C= Pad = input of conv2D, +// A = input(image), B = input(paddings), C= Pad = input of conv2D, // D=input(filter), E = Conv2D, Z = Zeta // C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) // After layout pass -// _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) +// _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Positive) { CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); InitGraph( @@ -2049,10 +2049,10 @@ TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Positive) { } // Test set 0: Pad + Conv2D; padding is SAME -// A = input(image), B = input(paddings), C= Pad = input of conv2D, +// A = input(image), B = input(paddings), C= Pad = input of conv2D, // D=input(filter), E = Conv2D, Z = Zeta // C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) -// After layout pass - No merging +// After layout pass - No merging TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Negative) { CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); InitGraph( @@ -2075,11 +2075,12 @@ TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Negative) { "node { name: 'Z' op: 'Zeta'" " attr {key: 'T' value { type: DT_FLOAT } }" " input: ['E', 'Y']}"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);C(Pad);D(Input);DMT/_0(Const);DMT/_1(Const);" - "E(_MklConv2D);Y(Input);Z(Zeta)|A->C;B->C:1;C->E;" - "C:control->DMT/_0:control;C:control->DMT/_1:control;" - "D->E:1;DMT/_0->E:2;DMT/_1->E:3;E->Z;Y->Z:1"); + EXPECT_EQ( + DoMklLayoutOptimizationPass(), + "A(Input);B(Int32Input);C(Pad);D(Input);DMT/_0(Const);DMT/_1(Const);" + "E(_MklConv2D);Y(Input);Z(Zeta)|A->C;B->C:1;C->E;" + "C:control->DMT/_0:control;C:control->DMT/_1:control;" + "D->E:1;DMT/_0->E:2;DMT/_1->E:3;E->Z;Y->Z:1"); } // Test set 1: Conv2D + AddBias diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 8c8be197f9..7ee9f66810 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -963,39 +963,38 @@ class MklConv2DOp : public OpKernel { errors::Aborted("Operation received an exception:", error_msg)); } } - - void PadWithConvFusion(OpKernelContext* context, memory::dims &padding_left, - memory::dims &padding_right){ + + void PadWithConvFusion(OpKernelContext* context, memory::dims& padding_left, + memory::dims& padding_right) { const Tensor& paddings_tf = MklGetInput(context, 2); OP_REQUIRES(context, paddings_tf.dims() == 2, errors::InvalidArgument("paddings must be 2-dimensional: ", paddings_tf.shape().DebugString())); Tpadding* paddings = nullptr; // To get individual pad, need to flatten the tensor - paddings = static_cast(const_cast - (paddings_tf.flat().data())); + paddings = static_cast( + const_cast(paddings_tf.flat().data())); // For NHWC format: - // paddings[0], paddings[1], paddings[6], paddings[7] should be zero + // paddings[0], paddings[1], paddings[6], paddings[7] should be zero // if the paddings_tf is [ [0, 0] [1,2] [3,4] [0,0] ] // paddings = {0, 0, 1, 2, 3, 4, 0, 0} ; flat method is row major // then, values are: top = 1, bottom =2, left=3, right=4 - // For NCHW format: - // paddings[0], paddings[1], paddings[2], paddings[3] should be zero + // For NCHW format: + // paddings[0], paddings[1], paddings[2], paddings[3] should be zero // similar explanation as NHWC format will apply. - int64 pad_top, pad_left; - int64 pad_bottom, pad_right; + int64 pad_top, pad_left; + int64 pad_bottom, pad_right; string data_format = ToString(data_format_); - if(data_format == "NHWC"){ - pad_top = paddings[2]; - pad_bottom = paddings[3]; - pad_left = paddings[4]; - pad_right = paddings[5]; - } - else if (data_format == "NCHW"){ - pad_top = paddings[4]; - pad_bottom = paddings[5]; - pad_left = paddings[6]; - pad_right = paddings[7]; + if (data_format == "NHWC") { + pad_top = paddings[2]; + pad_bottom = paddings[3]; + pad_left = paddings[4]; + pad_right = paddings[5]; + } else if (data_format == "NCHW") { + pad_top = paddings[4]; + pad_bottom = paddings[5]; + pad_left = paddings[6]; + pad_right = paddings[7]; } // Create padding arrays for MKL DNN convolutions. // MKL-DNN uses asymetric padding. @@ -1124,13 +1123,13 @@ class MklConv2DOp : public OpKernel { .TypeConstraint("T") \ .TypeConstraint("Tpaddings") \ .Label(mkl_op_registry::kMklOpLabel), \ - MklConv2DOp); \ + MklConv2DOp); \ REGISTER_KERNEL_BUILDER(Name("_MklPadWithConv2D") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ .TypeConstraint("Tpaddings") \ .Label(mkl_op_registry::kMklOpLabel), \ - MklConv2DOp); \ + MklConv2DOp); \ REGISTER_KERNEL_BUILDER(Name("__MklDummyPadWithConv2D") \ .Device(DEVICE_CPU) \ .TypeConstraint("T") \ @@ -1139,7 +1138,7 @@ class MklConv2DOp : public OpKernel { MklDummyOp); TF_CALL_float(REGISTER_MKL_CPU); -#endif // INTEL_MKL_ML +#endif // INTEL_MKL_ML_ONLY } // namespace tensorflow #endif // INTEL_MKL diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 3955bd919d..cd24ae02c4 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -230,9 +230,8 @@ class MklDnnConvUtil { virtual inline void GetOutputAndPadSizeInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, const memory::dims& strides, const memory::dims& dilations, - memory::dims* output_dims_tf_order, - memory::dims* output_dims_mkl_order, memory::dims* pad_l, - memory::dims* pad_r, bool padEnabled=false) { + memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, + memory::dims* pad_l, memory::dims* pad_r, bool padEnabled = false) { CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -269,17 +268,17 @@ class MklDnnConvUtil { dilation_cols, stride_cols, padding_, &out_cols, &pad_left, &pad_right)); // If padEnabled, i.e., pad and conv op are fused, then - // all pads are already passed from pad op through - // *pad_l and *pad_r - if(padEnabled) { - pad_top = static_cast((*pad_l)[0]); - pad_left = static_cast((*pad_l)[1]); - pad_bottom = static_cast((*pad_r)[0]); - pad_right = static_cast((*pad_r)[1]); - // update the out_rows and out_cols based on all - // sides of the pads coming from pad op. - out_rows = out_rows + (pad_top + pad_bottom ) / stride_rows; - out_cols = out_cols + (pad_left + pad_right ) / stride_cols; + // all pads are already passed from pad op through + // *pad_l and *pad_r + if (padEnabled) { + pad_top = static_cast((*pad_l)[0]); + pad_left = static_cast((*pad_l)[1]); + pad_bottom = static_cast((*pad_r)[0]); + pad_right = static_cast((*pad_r)[1]); + // update the out_rows and out_cols based on all + // sides of the pads coming from pad op. + out_rows = out_rows + (pad_top + pad_bottom) / stride_rows; + out_cols = out_cols + (pad_left + pad_right) / stride_cols; } // Handle padding. MKL-DNN uses asymetric padding. // But, if padEnabled, i.e., pad and conv op are fused, @@ -342,10 +341,9 @@ class MklDnnConvUtil { inline void GetConvFwdSizesInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, memory::dims* input_dims, memory::dims* filter_dims, - memory::dims* strides, memory::dims *dilations, - memory::dims* output_dims_tf_order, - memory::dims* output_dims_mkl_order, memory::dims* pad_l, - memory::dims* pad_r, bool padEnabled=false) { + memory::dims* strides, memory::dims* dilations, + memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, + memory::dims* pad_l, memory::dims* pad_r, bool padEnabled = false) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); @@ -361,15 +359,13 @@ class MklDnnConvUtil { if (!context_->status().ok()) return; GetStridesInMklOrder(strides); GetDilationsInMklOrder(dilations); - GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, - *strides, *dilations, - output_dims_tf_order, output_dims_mkl_order, - pad_l, pad_r, padEnabled); + GetOutputAndPadSizeInMklOrder( + input_shape, filter_shape, *strides, *dilations, output_dims_tf_order, + output_dims_mkl_order, pad_l, pad_r, padEnabled); if (!context_->status().ok()) return; } }; - ///////////////////////////////////////////////////////////////////// /// Common class that implements Conv2DBackpropFilter and Input ///////////////////////////////////////////////////////////////////// -- GitLab From 53f2aefe86f8b50addd4b67eb20eb91135b1fac7 Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Tue, 14 Aug 2018 13:23:16 -0700 Subject: [PATCH 0065/1078] fixed, so that now not allowing duplicate control edges, alos cleaning up the comments --- tensorflow/core/graph/mkl_layout_pass.cc | 15 ++++++--------- tensorflow/core/kernels/mkl_conv_ops.h | 4 ---- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 84e8ea8f70..9157080330 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -4189,16 +4189,14 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, // We handle control edges now. for (const Edge* e : pred->in_edges()) { if (e->IsControlEdge()) { - // Allow duplicate while adding control edge as it would fail (return - // NULL) if we try to add duplicate edge. - CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true)); + //Don't allow duplicate edge + (*g)->AddControlEdge(e->src(), new_node, false); } } for (const Edge* e : succ->in_edges()) { if (e->IsControlEdge()) { - // Allow duplicate while adding control edge as it would fail (return - // NULL) if we try to add duplicate edge. - CHECK_NOTNULL((*g)->AddControlEdge(e->src(), new_node, true)); + //Don't allow duplicate edge + (*g)->AddControlEdge(e->src(), new_node, false); } } @@ -4206,9 +4204,8 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, // First, we will fix outgoing control edges from 'pred' node. for (const Edge* e : pred->out_edges()) { if (e->IsControlEdge()) { - // Allow duplicate while adding control edge as it would fail (return - // NULL) if we try to add duplicate edge. - CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true)); + //Don't allow duplicate edge + (*g)->AddControlEdge(new_node, e->dst(), false); } } diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index cd24ae02c4..ebaf1a9947 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -300,10 +300,6 @@ class MklDnnConvUtil { mkldnn_sizes[MklDnnDims::Dim_H] = static_cast(out_rows); mkldnn_sizes[MklDnnDims::Dim_W] = static_cast(out_cols); *output_dims_mkl_order = mkldnn_sizes; - - // Now handle padding. MKL-DNN uses asymetric padding. - // But, if padEnabled, i.e., pad and conv op are fused, - // then, *pad_l and *pad_r are already set from pad op } // Calculate output and pad size of forward Convolution operator. -- GitLab From f6c9e054a042bf0f518a740380f3f96a28e8c5be Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Fri, 17 Aug 2018 12:45:06 -0700 Subject: [PATCH 0066/1078] not allowing duplicate edges, and, add two unit tests in mkl_layout_pass_test to test if common input and common output of pad an conv2D work correctly for pad+conv2D fusion --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- tensorflow/core/graph/mkl_layout_pass_test.cc | 212 ++++++++++++------ 2 files changed, 143 insertions(+), 71 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 9157080330..6d99e57417 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -4214,7 +4214,7 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, if (e->IsControlEdge()) { // Allow duplicate while adding control edge as it would fail (return // NULL) if we try to add duplicate edge. - CHECK_NOTNULL((*g)->AddControlEdge(new_node, e->dst(), true)); + (*g)->AddControlEdge(new_node, e->dst(), false); } else { // Conv2D has only 1 output (at slot 0) and merged node also has only 1 // output (at slot 0). diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index d1b39ceeca..248520a7f4 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -1994,6 +1994,10 @@ REGISTER_OP("_MklInput2") .Output("o: uint8") .Output("o1: uint8") .SetIsStateful(); +REGISTER_OP("Output2") + .Input("i: float") + .Input("i1: float") + .SetIsStateful(); ///////////////////////////////////////////////////////////////////// // Unit tests related to node merge optiimization @@ -2012,76 +2016,6 @@ TEST_F(MklLayoutPassTest, Basic) { "A->C;A->D;B->C:1;B->D:1"); } -// Test set 0: Pad + Conv2D; padding is VALID -// A = input(image), B = input(paddings), C= Pad = input of conv2D, -// D=input(filter), E = Conv2D, Z = Zeta -// C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) -// After layout pass -// _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) -TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Positive) { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Int32Input'}" - "node { name: 'C' op: 'Pad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'Tpaddings' value { type: DT_INT32 } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NHWC' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'VALID' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['C', 'D'] }" - "node { name: 'Y' op: 'Input'}" - "node { name: 'Z' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['E', 'Y']}"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" - "A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" - "DMT/_2->E:5;E->Z;Y->Z:1"); -} - -// Test set 0: Pad + Conv2D; padding is SAME -// A = input(image), B = input(paddings), C= Pad = input of conv2D, -// D=input(filter), E = Conv2D, Z = Zeta -// C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) -// After layout pass - No merging -TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Negative) { - CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); - InitGraph( - "node { name: 'A' op: 'Input'}" - "node { name: 'B' op: 'Int32Input'}" - "node { name: 'C' op: 'Pad'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'Tpaddings' value { type: DT_INT32 } }" - " input: ['A', 'B']}" - "node { name: 'D' op: 'Input'}" - "node { name: 'E' op: 'Conv2D'" - " attr { key: 'T' value { type: DT_FLOAT } }" - " attr { key: 'data_format' value { s: 'NHWC' } }" - " attr { key: 'use_cudnn_on_gpu' value { b: false } }" - " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" - " attr { key: 'padding' value { s: 'SAME' } }" - " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" - " input: ['C', 'D'] }" - "node { name: 'Y' op: 'Input'}" - "node { name: 'Z' op: 'Zeta'" - " attr {key: 'T' value { type: DT_FLOAT } }" - " input: ['E', 'Y']}"); - EXPECT_EQ( - DoMklLayoutOptimizationPass(), - "A(Input);B(Int32Input);C(Pad);D(Input);DMT/_0(Const);DMT/_1(Const);" - "E(_MklConv2D);Y(Input);Z(Zeta)|A->C;B->C:1;C->E;" - "C:control->DMT/_0:control;C:control->DMT/_1:control;" - "D->E:1;DMT/_0->E:2;DMT/_1->E:3;E->Z;Y->Z:1"); -} // Test set 1: Conv2D + AddBias @@ -2389,6 +2323,144 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) { "E:3->G:4;F->G;F:control->DMT/_3:control;G->Z;X->Y:1;X->Z:1"); } +// Test set 3: Pad + Conv2D fusion +// padding is VALID type +// A = input(image), B = input(paddings), C= Pad = input of conv2D, +// D=input(filter), E = Conv2D, Z = Zeta +// C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) +// After layout pass +// _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) +TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Positive) { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Int32Input'}" + "node { name: 'C' op: 'Pad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'Tpaddings' value { type: DT_INT32 } }" + " input: ['A', 'B']}" + "node { name: 'D' op: 'Input'}" + "node { name: 'E' op: 'Conv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NHWC' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'VALID' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['C', 'D'] }" + "node { name: 'Y' op: 'Input'}" + "node { name: 'Z' op: 'Zeta'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['E', 'Y']}"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" + "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" + "A:control->DMT/_0:control;A:control->DMT/_1:control;" + "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" + "DMT/_2->E:5;E->Z;Y->Z:1"); +} +// Pad + Conv2D fusion with padding is VALID, +// Input node pointing to both Pad and Conv2D +// A = input(image), B = input(paddings), C= Pad +// E = Conv2D, Z = Zeta +// C=Pad(A,B); E=Conv2D(C,A); Z=Zeta(E,Y) +// After layout pass +// _MklPadWithConv2D(A, A, B, DMT/_0, DMT/_1, DMT/_2) +TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Common_Input) { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Int32Input'}" + "node { name: 'C' op: 'Pad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'Tpaddings' value { type: DT_INT32 } }" + " input: ['A', 'B']}" + "node { name: 'E' op: 'Conv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NHWC' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'VALID' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['C', 'A'] }" + "node { name: 'Y' op: 'Input'}" + "node { name: 'Z' op: 'Zeta'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['E', 'Y']}"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Int32Input);DMT/_0(Const);DMT/_1(Const);" + "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;A->E:1;" + "A:control->DMT/_0:control;A:control->DMT/_1:control;" + "A:control->DMT/_2:control;B->E:2;DMT/_0->E:3;DMT/_1->E:4;" + "DMT/_2->E:5;E->Z;Y->Z:1"); +} +// Pad + Conv2D with padding is VALID, +// Input node pointing to both Pad and Conv2D +// Output of both Pad and Conv2D feeds one node (Z as Output2) +// A = input(as image), B = input(as paddings), C= Pad +// E = Conv2D, Z = Output2 +// C=Pad(A,B); E=Conv2D(C,A); Z=Output(C,E) +// After layout pass - No merging, since Pad and Conv2D both +// feed to the same node (Z) +TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Common_InOutput) { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Int32Input'}" + "node { name: 'C' op: 'Pad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'Tpaddings' value { type: DT_INT32 } }" + " input: ['A', 'B']}" + "node { name: 'E' op: 'Conv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NHWC' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'VALID' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['C', 'A'] }" + "node { name: 'Z' op: 'Output2'" + " input: ['C', 'E']}"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Int32Input);C(Pad);DMT/_0(Const);DMT/_1(Const);" + "E(_MklConv2D);Z(Output2)|A->C;A->E:1;B->C:1;C->E;C->Z;" + "C:control->DMT/_0:control;C:control->DMT/_1:control;" + "DMT/_0->E:2;DMT/_1->E:3;E->Z:1"); +} +// Pad + Conv2D; padding is SAME +// A = input(image), B = input(paddings), C= Pad = input of conv2D, +// D=input(filter), E = Conv2D, Z = Zeta +// C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) +// After layout pass - No merging +TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Negative) { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Int32Input'}" + "node { name: 'C' op: 'Pad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'Tpaddings' value { type: DT_INT32 } }" + " input: ['A', 'B']}" + "node { name: 'D' op: 'Input'}" + "node { name: 'E' op: 'Conv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NHWC' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'SAME' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['C', 'D'] }" + "node { name: 'Y' op: 'Input'}" + "node { name: 'Z' op: 'Zeta'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['E', 'Y']}"); + EXPECT_EQ( + DoMklLayoutOptimizationPass(), + "A(Input);B(Int32Input);C(Pad);D(Input);DMT/_0(Const);DMT/_1(Const);" + "E(_MklConv2D);Y(Input);Z(Zeta)|A->C;B->C:1;C->E;" + "C:control->DMT/_0:control;C:control->DMT/_1:control;" + "D->E:1;DMT/_0->E:2;DMT/_1->E:3;E->Z;Y->Z:1"); +} ///////////////////////////////////////////////////////////////////// // Unit tests related to rewriting node to Mkl node ///////////////////////////////////////////////////////////////////// -- GitLab From f8ec0f101bac066faa2e917ac714ca9eea310eac Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Thu, 13 Sep 2018 22:40:49 -0700 Subject: [PATCH 0067/1078] adding checks that pad fusion works only Conv2D --- tensorflow/core/kernels/mkl_conv_ops.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 54670c8521..4b54ce1d52 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -900,7 +900,10 @@ class MklConvOp : public OpKernel { bool isConv2D = (strides_.size() == 4); // TODO(Intel-tf) Add check to make sure padEnabled is true only for 2D - + if(!isConv2D){ + OP_REQUIRES(context, padEnabled, + errors::InvalidArgument("Pad+Conv fusion only works for 2D")); + } // Create memory for user data. // Describe how the inputs and outputs of Convolution look like. Also // specify buffers containing actual input and output data. -- GitLab From 0e87ed82815053b4f1c038975382d72282fdf97f Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Sun, 16 Sep 2018 11:20:26 -0700 Subject: [PATCH 0068/1078] Adding two unit tests for pad+conv2d fusion. They test if the two merging ops get control edge from a common op, then the merged node will have only one control edge. --- tensorflow/core/graph/mkl_layout_pass.cc | 21 +++- tensorflow/core/graph/mkl_layout_pass_test.cc | 111 ++++++++++++++++++ 2 files changed, 128 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index ef8a2b0838..d3a4112ee9 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -4192,10 +4192,23 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, // 2. Get inputs from both the nodes. - // Pad must have 2 inputs: "input" and paddings. - CHECK_EQ(pred->in_edges().size(), 2); - // Conv2D must have 2 inputs: pad output and Filter - CHECK_EQ(succ->in_edges().size(), 2); + // Pad must have 2 data inputs: "input" and paddings. + int PadDataInputEdges = 0; + for (const Edge* e : pred->in_edges()) { + if (!e->IsControlEdge()) { + PadDataInputEdges++; + } + } + CHECK_EQ(PadDataInputEdges, 2); + + // Conv2D must have 2 data inputs: pad output and Filter + int ConvDataInputEdges = 0; + for (const Edge* e : succ->in_edges()) { + if (!e->IsControlEdge()) { + ConvDataInputEdges++; + } + } + CHECK_EQ(ConvDataInputEdges, 2); // We will use the node name of Conv2D as the name of new node // Build new node. We use same name as original node, but change the op diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 248520a7f4..e9e234010c 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -1928,6 +1928,13 @@ static void InitGraph(const string& s, Graph* graph, class MklLayoutPassTest : public ::testing::Test { public: MklLayoutPassTest() : graph_(OpRegistry::Global()) {} + // Return Node* from the Node Name + Node* FindNode(const string& name) { + for (Node* node : graph_.nodes()) { + if (node->name() == name) return node; + } + LOG(FATAL) << name; + } void InitGraph(const string& s, const string& device = kCPUDevice) { ::tensorflow::InitGraph(s, &graph_, device); @@ -1998,6 +2005,9 @@ REGISTER_OP("Output2") .Input("i: float") .Input("i1: float") .SetIsStateful(); +REGISTER_OP("Output") + .Input("i: float") + .SetIsStateful(); ///////////////////////////////////////////////////////////////////// // Unit tests related to node merge optiimization @@ -2359,6 +2369,107 @@ TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Positive) { "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" "DMT/_2->E:5;E->Z;Y->Z:1"); } +// Test if input control edges do not duplicate after merge. +// If both the merging ops have input control edge from a common op +// then, the merged op will have only one control edge from that +// common op. +// padding is VALID type +// A = input(image), A1 = input, B = input(paddings), +// C= Pad = input of conv2D, +// D=input(filter), E = Conv2D, Z = Zeta +// C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) +// C:control->A1:control +// E:control->A1:control +// After layout pass +// _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) +// A1:control->E:control +TEST_F(MklLayoutPassTest, Output_ControlEdge_PadWithConv2D_Positive) { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + InitGraph( + "node { name: 'A1' op: 'Input'}" + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Int32Input'}" + "node { name: 'C' op: 'Pad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'Tpaddings' value { type: DT_INT32 } }" + " input: ['A', 'B']}" + "node { name: 'D' op: 'Input'}" + "node { name: 'E' op: 'Conv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NHWC' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'VALID' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['C', 'D'] }" + "node { name: 'Y' op: 'Input'}" + "node { name: 'Z' op: 'Zeta'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['E', 'Y']}"); + Node* a1 = FindNode("A1"); + Node* c = FindNode("C"); + Node* e = FindNode("E"); + const Edge* edge = graph_.AddControlEdge(a1, c); + const Edge* edge_1 = graph_.AddControlEdge(a1, e); + ASSERT_TRUE(edge != nullptr); + ASSERT_TRUE(edge_1 != nullptr); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);A1(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" + "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" + "A1:control->E:control;A:control->DMT/_0:control;A:control->DMT/_1:control;" + "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" + "DMT/_2->E:5;E->Z;Y->Z:1"); +} +// Test if output control edges does not duplicate after merge. +// If both the merging ops have output control edge to a common op, +// then after merge, the merged op will have only one control edge +// to that commom op. +// padding is VALID type +// A = input(image), B = input(paddings), C= Pad = input of conv2D, +// D=input(filter), E = Conv2D, Z = Zeta +// C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) +// C:control->A1:control +// E:control->A1:control +// After layout pass +// _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) +// E:control->A1:control (only one control edge) +TEST_F(MklLayoutPassTest, ControlEdge_PadWithConv2D_Positive) { + CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); + InitGraph( + "node { name: 'A1' op: 'Input'}" + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Int32Input'}" + "node { name: 'C' op: 'Pad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'Tpaddings' value { type: DT_INT32 } }" + " input: ['A', 'B']}" + "node { name: 'D' op: 'Input'}" + "node { name: 'E' op: 'Conv2D'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'data_format' value { s: 'NHWC' } }" + " attr { key: 'use_cudnn_on_gpu' value { b: false } }" + " attr { key: 'strides' value { list: {i: 1, i:1, i:1, i:1} } }" + " attr { key: 'padding' value { s: 'VALID' } }" + " attr { key: 'dilations' value { list: {i: 1, i:1, i:1, i:1} } }" + " input: ['C', 'D'] }" + "node { name: 'Y' op: 'Input'}" + "node { name: 'Z' op: 'Zeta'" + " attr {key: 'T' value { type: DT_FLOAT } }" + " input: ['E', 'Y']}"); + Node* a1 = FindNode("A1"); + Node* c = FindNode("C"); + Node* e = FindNode("E"); + const Edge* edge = graph_.AddControlEdge(c, a1); + const Edge* edge_1 = graph_.AddControlEdge(e, a1); + ASSERT_TRUE(edge != nullptr); + ASSERT_TRUE(edge_1 != nullptr); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);A1(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" + "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" + "A:control->DMT/_0:control;A:control->DMT/_1:control;" + "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" + "DMT/_2->E:5;E->Z;E:control->A1:control;Y->Z:1"); +} // Pad + Conv2D fusion with padding is VALID, // Input node pointing to both Pad and Conv2D // A = input(image), B = input(paddings), C= Pad -- GitLab From 4e140eed6b4f6722b94cf85432d4519b8c5ce0bf Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Mon, 17 Sep 2018 10:05:40 -0700 Subject: [PATCH 0069/1078] changing the name of the unit tests --- tensorflow/core/graph/mkl_layout_pass_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index e9e234010c..9ad45a2cfd 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -1928,7 +1928,7 @@ static void InitGraph(const string& s, Graph* graph, class MklLayoutPassTest : public ::testing::Test { public: MklLayoutPassTest() : graph_(OpRegistry::Global()) {} - // Return Node* from the Node Name + // Ashraf added Node* FindNode(const string& name) { for (Node* node : graph_.nodes()) { if (node->name() == name) return node; @@ -2383,7 +2383,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Positive) { // After layout pass // _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) // A1:control->E:control -TEST_F(MklLayoutPassTest, Output_ControlEdge_PadWithConv2D_Positive) { +TEST_F(MklLayoutPassTest, Input_ControlEdge_PadWithConv2D_Positive) { CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); InitGraph( "node { name: 'A1' op: 'Input'}" -- GitLab From 2a8f7bcc59bc4e36ea88f4187028b4461f5f1072 Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Tue, 18 Sep 2018 11:28:21 -0700 Subject: [PATCH 0070/1078] minor change in the two unit tests --- tensorflow/core/graph/mkl_layout_pass_test.cc | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 9ad45a2cfd..60a7f138c8 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -2378,11 +2378,11 @@ TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Positive) { // C= Pad = input of conv2D, // D=input(filter), E = Conv2D, Z = Zeta // C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) -// C:control->A1:control -// E:control->A1:control -// After layout pass -// _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) +// A1:control->C:control // A1:control->E:control +// After layout pass: +// _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) +// A1:control->E:control (only one control edge) TEST_F(MklLayoutPassTest, Input_ControlEdge_PadWithConv2D_Positive) { CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); InitGraph( @@ -2411,8 +2411,8 @@ TEST_F(MklLayoutPassTest, Input_ControlEdge_PadWithConv2D_Positive) { Node* e = FindNode("E"); const Edge* edge = graph_.AddControlEdge(a1, c); const Edge* edge_1 = graph_.AddControlEdge(a1, e); - ASSERT_TRUE(edge != nullptr); - ASSERT_TRUE(edge_1 != nullptr); + ASSERT_NE(edge, nullptr); + ASSERT_NE(edge_1, nullptr); EXPECT_EQ(DoMklLayoutOptimizationPass(), "A(Input);A1(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" @@ -2430,10 +2430,10 @@ TEST_F(MklLayoutPassTest, Input_ControlEdge_PadWithConv2D_Positive) { // C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) // C:control->A1:control // E:control->A1:control -// After layout pass +// After layout pass: // _MklPadWithConv2D(A, D, B, DMT/_0, DMT/_1, DMT/_2) // E:control->A1:control (only one control edge) -TEST_F(MklLayoutPassTest, ControlEdge_PadWithConv2D_Positive) { +TEST_F(MklLayoutPassTest, Output_ControlEdge_PadWithConv2D_Positive) { CHECK_EQ(kTensorOrdering, MklTfTensorOrdering::TENSORS_CONTIGUOUS); InitGraph( "node { name: 'A1' op: 'Input'}" @@ -2461,8 +2461,8 @@ TEST_F(MklLayoutPassTest, ControlEdge_PadWithConv2D_Positive) { Node* e = FindNode("E"); const Edge* edge = graph_.AddControlEdge(c, a1); const Edge* edge_1 = graph_.AddControlEdge(e, a1); - ASSERT_TRUE(edge != nullptr); - ASSERT_TRUE(edge_1 != nullptr); + ASSERT_NE(edge, nullptr); + ASSERT_NE(edge_1, nullptr); EXPECT_EQ(DoMklLayoutOptimizationPass(), "A(Input);A1(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" -- GitLab From 3f803a9421fddf10a30745fc145d565d9737bd40 Mon Sep 17 00:00:00 2001 From: frreiss Date: Fri, 28 Sep 2018 17:18:01 -0700 Subject: [PATCH 0071/1078] Make add_n() handle a single IndexedSlices argument properly --- tensorflow/python/ops/math_ops.py | 4 +++- tensorflow/python/ops/math_ops_test.py | 11 +++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index f57abf6704..ebdfa592d3 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2135,6 +2135,8 @@ def _as_indexed_slices_list(inputs, optimize=True): def add_n(inputs, name=None): """Adds all input tensors element-wise. + Converts `IndexedSlices` objects into dense tensors prior to adding. + Args: inputs: A list of `Tensor` or `IndexedSlices` objects, each with same shape and type. @@ -2157,7 +2159,7 @@ def add_n(inputs, name=None): if len(inputs) == 1: if isinstance(inputs[0], ops.IndexedSlices): - values = inputs[0].values + values = ops.convert_to_tensor(inputs[0]) else: values = inputs[0] if name: diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index f051850d92..cd9c89e519 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -359,6 +359,17 @@ class AddNTest(test_util.TensorFlowTestCase): [g.eval() for g in add_n_grad]) + def testIndexedSlices(self): + slc = tf.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), + array_ops.constant([2]), array_ops.constant([2,2]) + slc_as_dense = np.array([[0, 0], [1, 2]]) + with self.test_session(use_gpu=True): + # add_n currently always converts IndexedSlices to dense + self.assertAllEqual(slc_as_dense, math_ops.add_n([slc]).eval()) + self.assertAllEqual(2 * slc_as_dense, math_ops.add_n([slc, slc]).eval()) + + + class DivAndModTest(test_util.TensorFlowTestCase): # TODO(aselle): Test more types before exposing new division operators. -- GitLab From 777e6a4e194e4cc141feb6b250702c0e4946ca2d Mon Sep 17 00:00:00 2001 From: wangsiyu Date: Mon, 1 Oct 2018 13:51:36 +0800 Subject: [PATCH 0072/1078] Make colocations be compatible with DistributionStrategy in SyncReplicasOptimizer --- tensorflow/python/training/sync_replicas_optimizer.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/training/sync_replicas_optimizer.py b/tensorflow/python/training/sync_replicas_optimizer.py index 7afaa92699..99d2563fc6 100644 --- a/tensorflow/python/training/sync_replicas_optimizer.py +++ b/tensorflow/python/training/sync_replicas_optimizer.py @@ -27,6 +27,7 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import distribution_strategy_context from tensorflow.python.training import optimizer from tensorflow.python.training import queue_runner from tensorflow.python.training import session_manager @@ -245,7 +246,9 @@ class SyncReplicasOptimizer(optimizer.Optimizer): # local_anchor op will be placed on this worker task by default. local_anchor = control_flow_ops.no_op() # Colocating local_step variable prevents it being placed on the PS. - with ops.colocate_with(local_anchor): + distribution_strategy = ( + distribution_strategy_context.get_distribution_strategy()) + with distribution_strategy.colocate_vars_with(local_anchor): self._local_step = variable_scope.variable( initial_value=0, trainable=False, -- GitLab From ca7105c42182f6ef562d18a7843090a2ef458b83 Mon Sep 17 00:00:00 2001 From: frreiss Date: Mon, 1 Oct 2018 17:25:33 -0700 Subject: [PATCH 0073/1078] Oops, missing paren --- tensorflow/python/ops/math_ops_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index cd9c89e519..fbae792cd0 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -360,8 +360,8 @@ class AddNTest(test_util.TensorFlowTestCase): def testIndexedSlices(self): - slc = tf.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), - array_ops.constant([2]), array_ops.constant([2,2]) + slc = ops.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), + array_ops.constant([1]), array_ops.constant([2,2])) slc_as_dense = np.array([[0, 0], [1, 2]]) with self.test_session(use_gpu=True): # add_n currently always converts IndexedSlices to dense -- GitLab From 2918d022954d4ce75e2b2ce4cd30c7f06d820444 Mon Sep 17 00:00:00 2001 From: frreiss Date: Mon, 1 Oct 2018 18:27:06 -0700 Subject: [PATCH 0074/1078] Remove extra blank line --- tensorflow/python/ops/math_ops_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index fbae792cd0..06abdcfc54 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -369,7 +369,6 @@ class AddNTest(test_util.TensorFlowTestCase): self.assertAllEqual(2 * slc_as_dense, math_ops.add_n([slc, slc]).eval()) - class DivAndModTest(test_util.TensorFlowTestCase): # TODO(aselle): Test more types before exposing new division operators. -- GitLab From f0cd69e047eb8f29538a27a41c17da20a1c59f2b Mon Sep 17 00:00:00 2001 From: himkt Date: Fri, 12 Oct 2018 13:17:58 +0900 Subject: [PATCH 0075/1078] Fix indentation in CRF1d --- tensorflow/contrib/crf/python/ops/crf.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/crf/python/ops/crf.py b/tensorflow/contrib/crf/python/ops/crf.py index 43bb43129b..7653b48e9c 100644 --- a/tensorflow/contrib/crf/python/ops/crf.py +++ b/tensorflow/contrib/crf/python/ops/crf.py @@ -38,12 +38,12 @@ tf_unary_scores, tf_sequence_lengths, tf_transition_params, _ = session.run( [unary_scores, sequence_lengths, transition_params, train_op]) for tf_unary_scores_, tf_sequence_length_ in zip(tf_unary_scores, tf_sequence_lengths): -# Remove padding. -tf_unary_scores_ = tf_unary_scores_[:tf_sequence_length_] + # Remove padding. + tf_unary_scores_ = tf_unary_scores_[:tf_sequence_length_] -# Compute the highest score and its tag sequence. -tf_viterbi_sequence, tf_viterbi_score = tf.contrib.crf.viterbi_decode( - tf_unary_scores_, tf_transition_params) + # Compute the highest score and its tag sequence. + tf_viterbi_sequence, tf_viterbi_score = tf.contrib.crf.viterbi_decode( + tf_unary_scores_, tf_transition_params) """ from __future__ import absolute_import -- GitLab From aa9bb45cc8d534e5b1cec8613bea4b4e30f622de Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Fri, 12 Oct 2018 17:55:19 -0700 Subject: [PATCH 0076/1078] Explicitly set jdk8 in ci_parameterized_build.sh (#22956) PiperOrigin-RevId: 216946217 --- tensorflow/tools/ci_build/ci_parameterized_build.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index 489722c0e9..bc9cb4e9a1 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -149,8 +149,12 @@ BAZEL_TEST_FLAGS=""\ "--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB}" BAZEL_BUILD_FLAGS="--keep_going" -BAZEL_CMD="bazel test ${BAZEL_TEST_FLAGS}" -BAZEL_BUILD_ONLY_CMD="bazel build ${BAZEL_BUILD_FLAGS}" +# Explicitly set jdk8 since that's what's installed in our images. Note that +# bazel 0.16 and higher defaults to jdk9, which causes failures. See b/117634064 +BAZEL_JAVA_FLAGS="--java_toolchain=@bazel_tools//tools/jdk:toolchain_hostjdk8" + +BAZEL_CMD="bazel test ${BAZEL_TEST_FLAGS} ${BAZEL_JAVA_FLAGS}" +BAZEL_BUILD_ONLY_CMD="bazel build ${BAZEL_BUILD_FLAGS} ${BAZEL_JAVA_FLAGS}" BAZEL_CLEAN_CMD="bazel clean" PIP_CMD="${CI_BUILD_DIR}/builds/pip.sh" -- GitLab From 650172a574504223ec2bdb328ed7c985389313d7 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 13 Oct 2018 20:30:26 +0000 Subject: [PATCH 0077/1078] Update test case for complex support of squared difference Signed-off-by: Yong Tang --- tensorflow/python/ops/math_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 533a00e737..62645230ee 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -217,7 +217,7 @@ class SquaredDifferenceTest(test_util.TensorFlowTestCase): for dtype in [np.complex64, np.complex128]: x = np.array([[1+3j, 2+2j, 3+1j], [4-1j, 5-2j, 6-3j]], dtype=dtype) y = np.array([-3+1j, -2+2j, -1+3j], dtype=dtype) - z = (x - y) * (x - y) + z = np.conj(x - y) * (x - y) with test_util.device(use_gpu=False): z_tf = self.evaluate(math_ops.squared_difference(x, y)) self.assertAllClose(z, z_tf) -- GitLab From 3a06e557619ebaa5437d1506af058b858806e9c7 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 13 Oct 2018 20:30:47 +0000 Subject: [PATCH 0078/1078] Update squared difference implementation for complex types. Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_ops.h | 32 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index 06918075a4..5afb97dc52 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -296,27 +296,31 @@ struct less_equal : std::binary_function { } }; -// Functor that enables composition of multiple Eigen functors. -template -struct scalar_compose_op { +// Functor that enables squared difference functor. +template +struct scalar_squared_difference_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator()(const Scalar& a, const Scalar& b) const { - return UnaryFunctor()(BinaryFunctor()(a, b)); + const Scalar v = scalar_difference_op()(a, b); + return scalar_product_op()(v, scalar_conjugate_op()(v)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { - return UnaryFunctor().packetOp(BinaryFunctor().packetOp(a, b)); + const Packet v = scalar_difference_op().packetOp(a, b); + return scalar_product_op().packetOp(v, scalar_conjugate_op().packetOp(v)); } }; -template -struct functor_traits> { +template +struct functor_traits> { enum { - Cost = functor_traits::Cost + - functor_traits::Cost, - PacketAccess = functor_traits::PacketAccess && - functor_traits::PacketAccess + Cost = functor_traits>::Cost + + functor_traits>::Cost + + functor_traits>::Cost, + PacketAccess = functor_traits>::PacketAccess && + functor_traits>::PacketAccess && + functor_traits>::PacketAccess }; }; @@ -709,7 +713,7 @@ struct rint : base> {}; // pow(x, y) = x ^ y // maximum(x, y) = x > y ? x : y // minimum(x, y) = x < y ? x : y -// squared_difference(x, y) = (x - y) * (x - y) +// squared_difference(x, y) = conj(x - y) * (x - y) template struct add : base> { @@ -812,9 +816,7 @@ struct atan2 : base> {}; template struct squared_difference - : base, - Eigen::internal::scalar_difference_op>> {}; + : base> {}; template struct less : base, bool> {}; -- GitLab From 82642d91dbe6fbba87e6a582e396ca91df1f6440 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 13 Oct 2018 20:43:59 +0000 Subject: [PATCH 0079/1078] Fix `Experimental clang-format Check` error Signed-off-by: Yong Tang --- tensorflow/core/kernels/cwise_ops.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index 5afb97dc52..2682a25868 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -308,7 +308,8 @@ struct scalar_squared_difference_op { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& b) const { const Packet v = scalar_difference_op().packetOp(a, b); - return scalar_product_op().packetOp(v, scalar_conjugate_op().packetOp(v)); + return scalar_product_op().packetOp( + v, scalar_conjugate_op().packetOp(v)); } }; -- GitLab From 5ffddda5b707099fb62097aae00ba9403adedd13 Mon Sep 17 00:00:00 2001 From: frreiss Date: Mon, 15 Oct 2018 15:05:58 -0700 Subject: [PATCH 0080/1078] lint issues --- tensorflow/python/ops/math_ops_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index 06abdcfc54..0973e707a7 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -361,7 +361,7 @@ class AddNTest(test_util.TensorFlowTestCase): def testIndexedSlices(self): slc = ops.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), - array_ops.constant([1]), array_ops.constant([2,2])) + array_ops.constant([1]), array_ops.constant([2, 2])) slc_as_dense = np.array([[0, 0], [1, 2]]) with self.test_session(use_gpu=True): # add_n currently always converts IndexedSlices to dense -- GitLab From 02987bd1e3459ba7ecfe689bb182ce8e692b70c4 Mon Sep 17 00:00:00 2001 From: "Yuan (Terry) Tang" Date: Tue, 16 Oct 2018 09:26:55 -0400 Subject: [PATCH 0081/1078] Add bullet points so Reduction values are clearer --- tensorflow/python/ops/losses/losses_impl.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 8a8a81ab5c..03cdc69ae5 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -39,13 +39,14 @@ class Reduction(object): """Types of loss reduction. Contains the following values: - `NONE`: Un-reduced weighted losses with the same shape as input. - `SUM`: Scalar sum of weighted losses. - `MEAN`: Scalar `SUM` divided by sum of weights. - `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses. - `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero + + * `NONE`: Un-reduced weighted losses with the same shape as input. + * `SUM`: Scalar sum of weighted losses. + * `MEAN`: Scalar `SUM` divided by sum of weights. + * `SUM_OVER_BATCH_SIZE`: Scalar `SUM` divided by number of elements in losses. + * `SUM_OVER_NONZERO_WEIGHTS`: Scalar `SUM` divided by number of non-zero weights. - `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. + * `SUM_BY_NONZERO_WEIGHTS`: Same as `SUM_OVER_NONZERO_WEIGHTS`. """ NONE = "none" -- GitLab From 7b081981131bf6da32065b8ecc3b8c5bd1280c4a Mon Sep 17 00:00:00 2001 From: Goldie Gadde Date: Tue, 16 Oct 2018 10:14:23 -0700 Subject: [PATCH 0082/1078] Update version information in preparation for 1.12.0-rc1 (#23028) --- tensorflow/core/public/version.h | 2 +- tensorflow/tools/pip_package/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 07eeeb4f03..592dd5da16 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc0" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 3632ee2076..7593cfb58b 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.12.0-rc0' +_VERSION = '1.12.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From b379cecbdc4a9e6a0f8e468e0877888956e35dd5 Mon Sep 17 00:00:00 2001 From: annarev Date: Thu, 18 Oct 2018 14:04:48 -0700 Subject: [PATCH 0083/1078] Include .inc files for absl headers (#23081) --- tensorflow/tools/pip_package/setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 7593cfb58b..8c3bd4ac70 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -232,6 +232,8 @@ headers = (list(find_files('*.h', 'tensorflow/core')) + list(find_files('*', 'third_party/eigen3')) + list(find_files('*.h', 'tensorflow/include/external/com_google_absl')) + + list(find_files('*.inc', + 'tensorflow/include/external/com_google_absl')) + list(find_files('*', 'tensorflow/include/external/eigen_archive'))) setup( -- GitLab From 2aaf639173420403b804a7216f8f1c51027b6240 Mon Sep 17 00:00:00 2001 From: Goldie Gadde Date: Fri, 19 Oct 2018 09:00:12 -0700 Subject: [PATCH 0084/1078] Update relnotes with Ignite information --- RELEASE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE.md b/RELEASE.md index 58d918895c..dbe34db0bb 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -4,6 +4,7 @@ * Keras models can now be directly exported to the SavedModel format(`tf.contrib.saved_model.save_keras_model()`) and used with Tensorflow Serving. * Keras models now support evaluating with a `tf.data.Dataset`. * TensorFlow binaries are built with XLA support linked in by default. +* Ignite Dataset added to contrib/ignite that allows to work with Apache Ignite. ## Bug Fixes and Other Changes -- GitLab From 878e98c1abd6cbd5bd044ddf8660c55e0c2a1634 Mon Sep 17 00:00:00 2001 From: Goldie Gadde Date: Fri, 19 Oct 2018 13:52:44 -0700 Subject: [PATCH 0085/1078] Update TF 1.12 version to 1.12-rc2 --- tensorflow/core/public/version.h | 2 +- tensorflow/tools/pip_package/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 592dd5da16..500ec8f97b 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc1" +#define TF_VERSION_SUFFIX "-rc2" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 8c3bd4ac70..b7eed56695 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.12.0-rc1' +_VERSION = '1.12.0-rc2' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From 4b4052c90e17c2c5bed45dc47c2d59d22f341b48 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Sat, 20 Oct 2018 16:17:55 -0700 Subject: [PATCH 0086/1078] Check for the presence of a Worker machine when reassigning hooks in distributed (#23116) training jobs. PiperOrigin-RevId: 217407558 --- tensorflow/python/estimator/estimator.py | 6 ++ tensorflow/python/estimator/estimator_test.py | 61 +++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index e6d82f0db7..8b957288c3 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -1424,7 +1424,13 @@ class Estimator(object): # evaluations. save_summary_steps = self._config.save_summary_steps log_step_count_steps = self._config.log_step_count_steps + + # Check existence of appropriate cluster spec fields, as well as master and + # worker nodes. As master also performs evaluation, summary writing must + # occur on a different node. The presence of a worker is also checked to + # prevent reassigning hooks for single-replica jobs with just a master node. if (self._config.cluster_spec and self._config.cluster_spec.jobs and + (run_config.TaskType.WORKER in self._config.cluster_spec.jobs) and (run_config.TaskType.MASTER in self._config.cluster_spec.jobs)): # Update config values to prevent the default hooks from being created on # the master or other workers. diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 246dfb1a4b..c26b3e6509 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -1063,6 +1063,67 @@ class EstimatorTrainTest(test.TestCase): self.assertEqual(0, mock_sess.call_args[1]['save_summaries_steps']) self.assertIsNone(mock_sess.call_args[1]['log_step_count_steps']) + def test_master_hooks_single_replica(self): + tf_config = json.dumps({ + 'cluster': { + run_config.TaskType.MASTER: ['localhost:1234'] + }, + 'task': { + 'type': run_config.TaskType.MASTER, + 'index': 0 + } + }) + with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): + est = estimator.Estimator( + model_fn=model_fn_global_step_incrementer, + config=run_config.RunConfig( + save_summary_steps=100, log_step_count_steps=200)) + + with test.mock.patch.object(training, + 'MonitoredTrainingSession') as mock_sess: + est.train(dummy_input_fn, steps=1) + self.assertFalse( + any( + isinstance(hook, basic_session_run_hooks.SummarySaverHook) + for hook in mock_sess.call_args[1]['hooks'])) + self.assertFalse( + any( + isinstance(hook, basic_session_run_hooks.StepCounterHook) + for hook in mock_sess.call_args[1]['hooks'])) + self.assertEqual(100, mock_sess.call_args[1]['save_summaries_steps']) + self.assertEqual(200, mock_sess.call_args[1]['log_step_count_steps']) + + def test_master_hooks_single_replica_with_ps(self): + tf_config = json.dumps({ + 'cluster': { + run_config.TaskType.MASTER: ['localhost:1234'], + run_config.TaskType.PS: ['localhost: 1235'], + }, + 'task': { + 'type': run_config.TaskType.MASTER, + 'index': 0 + } + }) + with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): + est = estimator.Estimator( + model_fn=model_fn_global_step_incrementer, + config=run_config.RunConfig( + save_summary_steps=100, log_step_count_steps=200)) + + with test.mock.patch.object(training, + 'MonitoredTrainingSession') as mock_sess: + est.train(dummy_input_fn, steps=1) + self.assertFalse( + any( + isinstance(hook, basic_session_run_hooks.SummarySaverHook) + for hook in mock_sess.call_args[1]['hooks'])) + self.assertFalse( + any( + isinstance(hook, basic_session_run_hooks.StepCounterHook) + for hook in mock_sess.call_args[1]['hooks'])) + self.assertEqual(100, mock_sess.call_args[1]['save_summaries_steps']) + self.assertEqual(200, mock_sess.call_args[1]['log_step_count_steps']) + def _model_fn_with_eval_metric_ops(features, labels, mode, params): _, _ = features, labels -- GitLab From 20b53f7fe512a022ccbf97c71da4bd49f2fd5a04 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Sat, 20 Oct 2018 18:02:12 -0700 Subject: [PATCH 0087/1078] Fix triggering of asynchronous checkpoints. (#23138) PiperOrigin-RevId: 217570792 --- .../contrib/tpu/python/tpu/async_checkpoint.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py index 20b7ba0997..700598d2f4 100644 --- a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py +++ b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py @@ -114,15 +114,12 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook): return SessionRunArgs(self._global_step_tensor) def after_run(self, run_context, run_values): - stale_global_step = run_values.results - if self._timer.should_trigger_for_step(stale_global_step + - self._steps_per_run): - # get the real value after train op. - global_step = run_context.session.run(self._global_step_tensor) - if self._timer.should_trigger_for_step(global_step): - self._timer.update_last_triggered_step(global_step) - if self._save(run_context.session, global_step): - run_context.request_stop() + global_step = run_context.session.run(self._global_step_tensor) + if self._timer.should_trigger_for_step(global_step): + self._timer.update_last_triggered_step(global_step) + logging.info("Triggering checkpoint. %s", global_step) + if self._save(run_context.session, global_step): + run_context.request_stop() def end(self, session): if self._save_thread: -- GitLab From 238bf3f5a503227befb15ba3dd8a861eb30c6f5c Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Sat, 20 Oct 2018 18:22:35 -0700 Subject: [PATCH 0088/1078] Async checkpointing: Save the graph in a background thread. (#23139) PiperOrigin-RevId: 217747382 --- .../contrib/tpu/python/tpu/async_checkpoint.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py index 700598d2f4..78253d83fc 100644 --- a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py +++ b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py @@ -69,6 +69,7 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook): raise ValueError("You cannot provide both saver and scaffold.") self._saver = saver self._save_thread = None + self._write_graph_thread = None self._checkpoint_dir = checkpoint_dir self._save_path = os.path.join(checkpoint_dir, checkpoint_basename) self._scaffold = scaffold @@ -97,9 +98,13 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook): # We do write graph and saver_def at the first call of before_run. # We cannot do this in begin, since we let other hooks to change graph and # add variables in begin. Graph is finalized after all begin calls. - training_util.write_graph( - ops.get_default_graph().as_graph_def(add_shapes=True), - self._checkpoint_dir, "graph.pbtxt") + def _write_graph_fn(self): + training_util.write_graph( + ops.get_default_graph().as_graph_def(add_shapes=True), + self._checkpoint_dir, "graph.pbtxt") + self._write_graph_thread = threading.Thread(target=_write_graph_fn) + self._write_graph_thread.start() + saver_def = self._get_saver().saver_def if self._get_saver() else None graph = ops.get_default_graph() meta_graph_def = meta_graph.create_meta_graph_def( @@ -125,6 +130,9 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook): if self._save_thread: logging.info("Waiting for any pending checkpoints to finish.") self._save_thread.join() + if self._write_graph_thread: + logging.info("Waiting for any pending write_graph to finish.") + self._write_graph_thread.join() last_step = session.run(self._global_step_tensor) -- GitLab From e40642fb03f96881c6e046e8b84606f29ab5d2b1 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Sat, 20 Oct 2018 18:43:02 -0700 Subject: [PATCH 0089/1078] Support fp16 types in ScatterNd GPU version (#23141) PiperOrigin-RevId: 217749577 --- tensorflow/core/kernels/scatter_nd_op.cc | 4 +- .../kernel_tests/scatter_nd_ops_test.py | 176 +++++++++--------- 2 files changed, 88 insertions(+), 92 deletions(-) diff --git a/tensorflow/core/kernels/scatter_nd_op.cc b/tensorflow/core/kernels/scatter_nd_op.cc index 2f8aede427..fd54c6d6d7 100644 --- a/tensorflow/core/kernels/scatter_nd_op.cc +++ b/tensorflow/core/kernels/scatter_nd_op.cc @@ -297,8 +297,7 @@ TF_CALL_bool(REGISTER_SCATTER_ND_CPU); REGISTER_SCATTER_ND_GPU(type); TF_CALL_int32(REGISTER_SCATTER_ND_ALL_GPU); -// TODO(b/66916790): Support half types in ScatterNd. -TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ND_ALL_GPU); +TF_CALL_GPU_NUMBER_TYPES(REGISTER_SCATTER_ND_ALL_GPU); TF_CALL_complex64(REGISTER_SCATTER_ND_ALL_GPU); TF_CALL_complex128(REGISTER_SCATTER_ND_ALL_GPU); @@ -587,7 +586,6 @@ namespace functor { DECLARE_GPU_SPECS_INDEX(T, int64) TF_CALL_int32(DECLARE_GPU_SPECS); -// TODO(b/66916790): Support half types in ScatterNd. TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPECS); TF_CALL_complex64(DECLARE_GPU_SPECS); TF_CALL_complex128(DECLARE_GPU_SPECS); diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 4b92309e4d..49d83fb1d5 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -36,6 +36,9 @@ from tensorflow.python.ops import variables from tensorflow.python.platform import test +GRADIENT_TESTS_DTYPES = (dtypes.float16, dtypes.float32, dtypes.float64) + + def _AsType(v, vtype): return v.astype(vtype) if isinstance(v, np.ndarray) else vtype(v) @@ -144,9 +147,8 @@ class StatefulScatterNdTest(test.TestCase): self.assertAllClose(new, ref_var.eval()) def _VariableRankTests(self, np_scatter, tf_scatter): - for vtype in (np.int32, - np.float32, np.float64, - np.complex64, np.complex128): + for vtype in (np.int32, np.float16, np.float32, np.float64, np.complex64, + np.complex128): for itype in (np.int32, np.int64): self._VariableRankTest(np_scatter, tf_scatter, vtype, itype) @@ -223,7 +225,7 @@ class StatefulScatterNdTest(test.TestCase): # self._VariableRankTests(_NumpyDiv, state_ops.scatter_nd_div) def _ScatterRepeatIndicesTest(self, np_scatter, tf_scatter): - for vtype in (np.int32, np.float32, np.float64): + for vtype in (np.int32, np.float16, np.float32, np.float64): for itype in (np.int32, np.int64): self._VariableRankTest( np_scatter, tf_scatter, vtype, itype, repeat_indices=True) @@ -520,97 +522,93 @@ class ScatterNdTest(test.TestCase): self.scatter_nd(indices, updates, shape) def testGradientsRank2ElementUpdate(self): - indices = constant_op.constant([[0, 0], [1, 1]], dtype=dtypes.int32) - updates = constant_op.constant([1, 4], dtype=dtypes.float64) - shape = constant_op.constant([2, 2], dtype=dtypes.int32) - input_ = array_ops.zeros(shape, dtype=dtypes.float64) - outputs = self.scatter_nd(indices, updates, shape, input_) - - grad_vals = constant_op.constant([[1, 2], [3, 4]], dtype=dtypes.float64) - updates_grad, input_grad = gradients_impl.gradients( - [outputs], [updates, input_], [grad_vals]) - expected_updates_grad = np.array([1, 4], dtype=np.float64) - expected_input_grad = np.array([[1, 2], [3, 4]], dtype=np.float64) - with self.cached_session(): - self.assertAllEqual(expected_updates_grad, updates_grad.eval()) - if self.non_aliasing_add_test: - self.assertAllEqual(expected_input_grad, input_grad.eval()) + for dtype in GRADIENT_TESTS_DTYPES: + indices = constant_op.constant([[0, 0], [1, 1]], dtype=dtypes.int32) + updates = constant_op.constant([1, 4], dtype=dtype) + shape = constant_op.constant([2, 2], dtype=dtypes.int32) + input_ = array_ops.zeros(shape, dtype=dtype) + outputs = self.scatter_nd(indices, updates, shape, input_) + + grad_vals = constant_op.constant([[1, 2], [3, 4]], dtype=dtype) + updates_grad, input_grad = gradients_impl.gradients( + [outputs], [updates, input_], [grad_vals]) + expected_updates_grad = np.array([1, 4], dtype=dtype.as_numpy_dtype()) + expected_input_grad = np.array([[1, 2], [3, 4]], + dtype=dtype.as_numpy_dtype()) + with self.cached_session(): + self.assertAllEqual(expected_updates_grad, updates_grad.eval()) + if self.non_aliasing_add_test: + self.assertAllEqual(expected_input_grad, input_grad.eval()) def testGradientsRank2SliceUpdate(self): - indices = constant_op.constant([[1], [0]], dtype=dtypes.int32) - updates = constant_op.constant([[3, 4], [1, 2]], dtype=dtypes.float64) - shape = constant_op.constant([2, 2], dtype=dtypes.int32) - input_ = array_ops.zeros(shape, dtype=dtypes.float64) - outputs = self.scatter_nd(indices, updates, shape, input_) - - grad_vals = constant_op.constant([[3, 4], [1, 2]], dtype=dtypes.float64) - updates_grad, input_grad = gradients_impl.gradients( - [outputs], [updates, input_], [grad_vals]) - expected_updates_grad = np.array([[1, 2], [3, 4]], dtype=np.float64) - expected_input_grad = np.array([[3, 4], [1, 2]], dtype=np.float64) - with self.cached_session(): - self.assertAllEqual(expected_updates_grad, updates_grad.eval()) - if self.non_aliasing_add_test: - self.assertAllEqual(expected_input_grad, input_grad.eval()) + for dtype in GRADIENT_TESTS_DTYPES: + indices = constant_op.constant([[1], [0]], dtype=dtypes.int32) + updates = constant_op.constant([[3, 4], [1, 2]], dtype=dtype) + shape = constant_op.constant([2, 2], dtype=dtypes.int32) + input_ = array_ops.zeros(shape, dtype=dtype) + outputs = self.scatter_nd(indices, updates, shape, input_) + + grad_vals = constant_op.constant([[3, 4], [1, 2]], dtype=dtype) + updates_grad, input_grad = gradients_impl.gradients( + [outputs], [updates, input_], [grad_vals]) + expected_updates_grad = np.array([[1, 2], [3, 4]], + dtype=dtype.as_numpy_dtype()) + expected_input_grad = np.array([[3, 4], [1, 2]], + dtype=dtype.as_numpy_dtype()) + with self.cached_session(): + self.assertAllEqual(expected_updates_grad, updates_grad.eval()) + if self.non_aliasing_add_test: + self.assertAllEqual(expected_input_grad, input_grad.eval()) def testGradientsRank3SliceUpdate(self): - indices = constant_op.constant( - [[[0, 1], [1, 0]], [[0, 0], [1, 1]]], dtype=dtypes.int32) - updates = constant_op.constant( - [[[5, 7], [2, 4]], [[1, 3], [6, 8]]], dtype=dtypes.float64) - shape = constant_op.constant([2, 2, 2], dtype=dtypes.int32) - input_ = array_ops.zeros(shape, dtype=dtypes.float64) - outputs = self.scatter_nd(indices, updates, shape, input_) - - grad_vals = constant_op.constant( - [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=dtypes.float64) - updates_grad, input_grad = gradients_impl.gradients( - [outputs], [updates, input_], [grad_vals]) - expected_updates_grad = np.array( - [[[3, 4], [5, 6]], [[1, 2], [7, 8]]], dtype=np.float64) - expected_input_grad = np.array( - [[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype=np.float64) - with self.cached_session(): - self.assertAllEqual(expected_updates_grad, updates_grad.eval()) - if self.non_aliasing_add_test: - self.assertAllEqual(expected_input_grad, input_grad.eval()) + for dtype in GRADIENT_TESTS_DTYPES: + indices = constant_op.constant([[[0, 1], [1, 0]], [[0, 0], [1, 1]]], + dtype=dtypes.int32) + updates = constant_op.constant([[[5, 7], [2, 4]], [[1, 3], [6, 8]]], + dtype=dtype) + shape = constant_op.constant([2, 2, 2], dtype=dtypes.int32) + input_ = array_ops.zeros(shape, dtype=dtype) + outputs = self.scatter_nd(indices, updates, shape, input_) + + grad_vals = constant_op.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], + dtype=dtype) + updates_grad, input_grad = gradients_impl.gradients( + [outputs], [updates, input_], [grad_vals]) + expected_updates_grad = np.array([[[3, 4], [5, 6]], [[1, 2], [7, 8]]], + dtype=dtype.as_numpy_dtype()) + expected_input_grad = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], + dtype=dtype.as_numpy_dtype()) + with self.cached_session(): + self.assertAllEqual(expected_updates_grad, updates_grad.eval()) + if self.non_aliasing_add_test: + self.assertAllEqual(expected_input_grad, input_grad.eval()) def testGradientsRank7SliceUpdate(self): - indices = constant_op.constant( - [[[ - [[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]], - [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]] - ]]], dtype=dtypes.int32) - updates = constant_op.constant( - [[[ - [[[[5, 6], [2, 4]]]], - [[[[1, 3], [6, 8]]]] - ]]], dtype=dtypes.float64) - shape = constant_op.constant([1, 1, 2, 1, 1, 2, 2], dtype=dtypes.int32) - input_ = array_ops.zeros(shape, dtype=dtypes.float64) - outputs = self.scatter_nd(indices, updates, shape, input_) - - grad_vals = constant_op.constant( - [[[ - [[[[1, 2], [3, 4]]]], - [[[[5, 6], [7, 8]]]] - ]]], dtype=dtypes.float64) - updates_grad, input_grad = gradients_impl.gradients( - [outputs], [updates, input_], [grad_vals]) - expected_updates_grad = np.array( - [[[ - [[[[3, 4], [5, 6]]]], - [[[[1, 2], [7, 8]]]] - ]]], dtype=np.float64) - expected_input_grad = np.array( - [[[ - [[[[1, 2], [3, 4]]]], - [[[[5, 6], [7, 8]]]] - ]]], dtype=np.float64) - with self.cached_session(): - self.assertAllEqual(expected_updates_grad, updates_grad.eval()) - if self.non_aliasing_add_test: - self.assertAllEqual(expected_input_grad, input_grad.eval()) + for dtype in GRADIENT_TESTS_DTYPES: + indices = constant_op.constant( + [[[[[[[0, 0, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0]]]], + [[[[0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 1]]]]]]], + dtype=dtypes.int32) + updates = constant_op.constant( + [[[[[[[5, 6], [2, 4]]]], [[[[1, 3], [6, 8]]]]]]], dtype=dtype) + shape = constant_op.constant([1, 1, 2, 1, 1, 2, 2], dtype=dtypes.int32) + input_ = array_ops.zeros(shape, dtype=dtype) + outputs = self.scatter_nd(indices, updates, shape, input_) + + grad_vals = constant_op.constant( + [[[[[[[1, 2], [3, 4]]]], [[[[5, 6], [7, 8]]]]]]], dtype=dtype) + updates_grad, input_grad = gradients_impl.gradients( + [outputs], [updates, input_], [grad_vals]) + expected_updates_grad = np.array( + [[[[[[[3, 4], [5, 6]]]], [[[[1, 2], [7, 8]]]]]]], + dtype=dtype.as_numpy_dtype()) + expected_input_grad = np.array( + [[[[[[[1, 2], [3, 4]]]], [[[[5, 6], [7, 8]]]]]]], + dtype=dtype.as_numpy_dtype()) + with self.cached_session(): + self.assertAllEqual(expected_updates_grad, updates_grad.eval()) + if self.non_aliasing_add_test: + self.assertAllEqual(expected_input_grad, input_grad.eval()) def testScatterNdRepatedIndicesAdd(self): indices = array_ops.zeros([100000, 1], dtypes.int32) -- GitLab From 4f9d57337b71fe0ab3f25696db456e6a446ef54a Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Mon, 22 Oct 2018 10:53:25 +0800 Subject: [PATCH 0090/1078] Redundant transpose removal: transpose + conv2d + transpose -> conv2d. --- tensorflow/core/graph/mkl_layout_pass.cc | 514 ++++++++++++++++-- tensorflow/core/graph/mkl_layout_pass_test.cc | 295 ++++++++++ 2 files changed, 771 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 69735aac02..233c5ab39b 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -25,6 +25,8 @@ limitations under the License. #include #include #include +#include + #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/optimization_registry.h" #include "tensorflow/core/framework/node_def_util.h" @@ -310,6 +312,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.slice = "Slice"; csinfo_.softmax = "Softmax"; csinfo_.split = "Split"; + csinfo_.transpose = "Transpose"; // Element-wise ops. Ensure you also add any new ops to IsOpElementWise // in the MklUtil.h (IsMklElementWiseOp method) to ensure that the // MklInputConversion op is added before it. @@ -508,6 +511,33 @@ class MklLayoutRewritePass : public GraphOptimizationPass { minfo_.push_back({csinfo_.conv2d_grad_filter, csinfo_.bias_add_grad, csinfo_.conv2d_grad_filter_with_bias, GetConv2DBackpropFilterOrBiasAddGrad}); + + // + // Add rules to fuse sequences such as "Transpose (NCHW -> NHWC) + Conv2D (NHWC) + Transpose (NHWC-> + // NCHW) " => "Conv2D (NCHW). Such patterns occur frequently in Keras. + // Note: we use the term "merge" is to combine (exactly) 2 nodes into one, while "fusion" is + // for 3+ nodes situation. + // + + // Transpose + Conv2d + Transpose: + std::vector transpose_to_nhwc = { NCHW::dim::N, NCHW::dim::H, NCHW::dim::W, NCHW::dim::C }; + std::vector transpose_to_nchw = { NHWC::dim::N, NHWC::dim::C, NHWC::dim::H, NHWC::dim::W }; + auto CheckForTransposeToNHWC = + std::bind(CheckForTranspose, std::placeholders::_1, transpose_to_nhwc); + auto CheckForConv2dOp = + std::bind(CheckForMklOp, std::placeholders::_1, csinfo_.conv2d); + auto CheckForTransposeToNCHW = + std::bind(CheckForTranspose, std::placeholders::_1, transpose_to_nchw); + auto FuseConv2D = + std::bind(FuseTransposeMklOpTranspose, std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3, "NCHW"); + finfo_.push_back({ + "transpose-elimination for Conv2D", { + CheckForTransposeToNHWC, CheckForConv2dOp, CheckForTransposeToNCHW + }, + // CheckForMklOp + FuseConv2D, CopyAttrsConv + }); } // Standard interface to run pass @@ -530,7 +560,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string name; // Original name of op of the node in the graph string new_name; // New name of the op of the node in the graph // A function handler to copy attributes from an old node to a new node. - std::function copy_attrs; + std::function copy_attrs; // A rule under which to rewrite this node std::function rewrite_rule; } RewriteInfo; @@ -560,6 +590,42 @@ class MklLayoutRewritePass : public GraphOptimizationPass { std::function get_node_to_be_merged; } MergeInfo; + // structure to specify information used in node fusion of 2+ operators + typedef struct { + std::string pattern_name; // name to describe this pattern, such as + // "Transpose_Mklop_Transpose". + std::vector > + node_checkers; // extra restriction checker for these ops + std::function< + Status(std::unique_ptr *, std::vector &, + std::function)> + fuse_func; + std::function copy_attrs; + } FusionInfo; + + // + // dimension indices for 2D tensor. + // + struct NCHW { + enum dim { N = 0, C = 1, H = 2, W = 3 }; + }; + + struct NHWC { + enum dim { N = 0, H = 1, W = 2, C = 3 }; + }; + + + // + // dimension indices for 3D tensor. + // + struct NCDHW { + enum dim { N = 0, C = 1, D = 2, H = 3, W = 4 }; + }; + + struct NDHWC { + enum dim { N = 0, D = 1, H = 2, W = 3, C = 4 }; + }; + /// Structure to store all constant strings /// NOTE: names are alphabetically sorted. typedef struct { @@ -619,6 +685,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string requantize; string tanh; string tanh_grad; + string transpose; string reshape; string slice; string softmax; @@ -637,6 +704,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { /// Maintain info about nodes to be merged std::vector minfo_; + /// Maintain info about nodes to be fused + std::vector finfo_; + /// Maintain structure of constant strings static ConstStringsInfo csinfo_; @@ -815,6 +885,121 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return n; } + // Return a node that can be fused with input node 'n' + // + // @return tuple. If we can find such nodes, the first + // element of the tuple is a true. Otherwise, it's false. + std::tuple, const MklLayoutRewritePass::FusionInfo> + CheckForNodeFusion(Node *n) const; + + // Fuse nodes in the vector "nodes" + Status FuseNode(std::unique_ptr *g, std::vector &nodes, + const MklLayoutRewritePass::FusionInfo fi); + + static Status FuseTransposeMklOpTranspose( + std::unique_ptr *g, std::vector &nodes, + std::function copy_attrs, + string data_format); + + static bool CheckForTranspose(const Node *node, std::vector perm) { + // + // Check node node, to see if it's "Transpose" + // + if (node->type_string() != "Transpose") + return false; + + // + // Check if has out control edge. If true, this is a training graph. + // Currently we focus on inference and do no fusion in training. + // + for (const Edge *e : node->out_edges()) { + if (e->IsControlEdge()) { + return false; + } + } + + // + // If "Transpose" has input control edges, don't fuse on it. + // + for (const Edge *e : node->in_edges()) { + if (e->IsControlEdge()) { + return false; + } + } + + // + // If "Transpose" has multiple output data edges, also don't fuse it. + // + if (node->num_outputs() > 1 || node->out_edges().size() > 1) + return false; + + // Check "perm" attribute, make sure it's what we want. + // + for (const Edge *e : node->in_edges()) { + if (!e->IsControlEdge()) { + const Node *perm_node = e->src(); + + const int kPermTensorIndex = 1; + if (perm_node->type_string() == "Const" && e->dst_input() == kPermTensorIndex) { + // we find the "perm" node, now try to retrieve its value. + const TensorProto *proto = nullptr; + CHECK_EQ(GetNodeAttr(perm_node->def(), "value", &proto).ok(), true); + + DataType type; + GetNodeAttr(perm_node->def(), "dtype", &type); + + // + // Here we directly access to the "tensor_context", rather than + // "int_val". This is because we find "int_val" is + // not set properly under some circumstances. + // + if (type == DT_INT32) { + const int type_size = 4; + const int *tensor_content = reinterpret_cast(proto->tensor_content().c_str()); + const int tensor_content_size = proto->tensor_content().size() / type_size; + + std::vector perm_value(tensor_content, tensor_content + tensor_content_size); + + return perm_value == perm; + + } else if (type == DT_INT64) { + const int type_size = 8; + const long *tensor_content = reinterpret_cast(proto->tensor_content().c_str()); + const int tensor_content_size = proto->tensor_content().size() / type_size; + + std::vector perm_value(tensor_content, tensor_content + tensor_content_size); + std::vector long_perm(perm.cbegin(), perm.cend()); + + return perm_value == long_perm; + + } + + return false; + } + } + } + + return false; + } + + static bool CheckForMklOp(const Node *node, string name = "") { + if (!name.empty() && node->type_string() != name) { + return false; + } + + // if mklop has multiple outputs, don't fuse it. + if (node->num_outputs() > 1) + return false; + + if (node->out_edges().size() > 1) + return false; + + DataType T; + TF_CHECK_OK(GetNodeAttr(node->def(), "T", &T)); + return mkl_op_registry::IsMklOp( + mkl_op_registry::GetMklOpName(node->type_string()), T); + } + // Check if the node 'n' has any applicable rewrite rule // We check for 2 scenarios for rewrite. // @@ -1070,22 +1255,39 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // We need operator-specific function to copy attributes because the framework // does not provide any generic function for it. // NOTE: names are alphabetically sorted. - static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsConv(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsQuantizedPooling(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsQuantizedConv2D(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsQuantizedConcat(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsRequantize(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsSlice(const Node* orig_node, NodeBuilder* nb); - static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb); + static void CopyAttrsAddN(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsBiasAddGrad(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsConcat(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsConcatV2(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsConv(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsDataType(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsFusedBatchNorm(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsLRN(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsPooling(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsQuantizedPooling(const Node* orig_node, NodeBuilder* nb, + bool change_format = false); + static void CopyAttrsQuantizedConv2D(const Node* orig_node, NodeBuilder* nb, + bool change_format = false); + static void CopyAttrsQuantizedConcat(const Node* orig_node, NodeBuilder* nb, + bool change_format = false); + static void CopyAttrsReshape(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + static void CopyAttrsRequantize(const Node* orig_node, NodeBuilder* nb, + bool change_format = false); + static void CopyAttrsSlice(const Node* orig_node, NodeBuilder* nb, + bool change_format = false); + static void CopyAttrsSplit(const Node *orig_node, NodeBuilder *nb, + bool change_format = false); + // Generate a graph node in graph 'g' representing a dummy Mkl tensor node, // using node for original node 'orig_node' and return it in '*out'. @@ -1586,8 +1788,8 @@ void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded( // Op-specific functions to copy attributes from old node to new node ////////////////////////////////////////////////////////////////////////// -void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, - NodeBuilder* nb) { +void MklLayoutRewritePass::CopyAttrsConv(const Node *orig_node, NodeBuilder *nb, + bool change_format) { DataType T; string data_format; string padding; @@ -1599,18 +1801,72 @@ void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); // Add attributes to new node. nb->Attr("T", T); - nb->Attr("strides", strides); - nb->Attr("dilations", dilations); nb->Attr("padding", padding); - nb->Attr("data_format", data_format); + + if (!change_format) { + nb->Attr("strides", strides); + nb->Attr("dilations", dilations); + + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); + nb->Attr("data_format", data_format); + } else { + std::vector new_strides; + std::vector new_dilations; + if (strides.size() == 5) { + // + // "strides" and "dilations" also need to be changed according to "data_format", + // in this case, is "NDHWC" to "NCDHW". + // + + new_strides = { + strides[NDHWC::dim::N], + strides[NDHWC::dim::C], + strides[NDHWC::dim::D], + strides[NDHWC::dim::H], + strides[NDHWC::dim::W] + }; + nb->Attr("strides", new_strides); + + new_dilations = { + dilations[NDHWC::dim::N], + dilations[NDHWC::dim::C], + dilations[NDHWC::dim::D], + dilations[NDHWC::dim::H], + dilations[NDHWC::dim::W] + }; + nb->Attr("dilations", new_dilations); + + } else { + // + // "strides" and "dilations" also need to be changed according to "data_format", + // in this case, is "NHWC" to "NCHW". + // + + new_strides = { + strides[NHWC::dim::N], + strides[NHWC::dim::C], + strides[NHWC::dim::H], + strides[NHWC::dim::W] + }; + nb->Attr("strides", new_strides); + + new_dilations = { + dilations[NHWC::dim::N], + dilations[NHWC::dim::C], + dilations[NHWC::dim::H], + dilations[NHWC::dim::W] + }; + nb->Attr("dilations", new_dilations); + } + } } void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; int N; @@ -1624,7 +1880,8 @@ void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; string data_format; std::vector strides; @@ -1641,7 +1898,8 @@ void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; int depth_radius; float bias; @@ -1664,7 +1922,8 @@ void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; string data_format; string padding; @@ -1686,7 +1945,8 @@ void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; // Get all attributes from old node. @@ -1697,7 +1957,8 @@ void MklLayoutRewritePass::CopyAttrsDataType(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsQuantizedPooling(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; string data_format; string padding; @@ -1717,7 +1978,8 @@ void MklLayoutRewritePass::CopyAttrsQuantizedPooling(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsQuantizedConv2D(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType Tinput, Tfilter, out_type; string padding; string data_format("NHWC"); @@ -1747,7 +2009,8 @@ void MklLayoutRewritePass::CopyAttrsQuantizedConv2D(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsRequantize(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType Tinput, out_type; // Get all attributes from old node. @@ -1760,7 +2023,8 @@ void MklLayoutRewritePass::CopyAttrsRequantize(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; DataType Tshape; @@ -1773,7 +2037,8 @@ void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsSlice(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; DataType Index; @@ -1786,7 +2051,8 @@ void MklLayoutRewritePass::CopyAttrsSlice(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; string data_format; int num_split; @@ -1803,7 +2069,8 @@ void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; int N; @@ -1817,7 +2084,8 @@ void MklLayoutRewritePass::CopyAttrsConcat(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; int N; DataType tidx; @@ -1834,7 +2102,8 @@ void MklLayoutRewritePass::CopyAttrsConcatV2(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsFusedBatchNorm(const Node* orig_node, - NodeBuilder* nb) { + NodeBuilder* nb, + bool change_format) { DataType T; float epsilon; string data_format; @@ -2231,7 +2500,8 @@ Status MklLayoutRewritePass::RewriteNode(std::unique_ptr* g, return s; } - ri->copy_attrs(const_cast(orig_node), &nb); + const bool kPartialCopyAttrs = false; + ri->copy_attrs(const_cast(orig_node), &nb, kPartialCopyAttrs); // Set the Mkl layer label for this op. if (DataTypeIsQuantized(orig_node->input_type(0)) || @@ -2391,6 +2661,151 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { return nullptr; } +////////////////////////////////////////////////////////////////////////// +// Helper functions for node fusion +////////////////////////////////////////////////////////////////////////// +Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( + std::unique_ptr *g, std::vector &nodes, + std::function copy_attrs, + string data_format) { + Node *transpose_to_nhwc = nodes[0]; + Node *mklop = nodes[1]; + Node *transpose_to_nchw = nodes[2]; + + const int transpose_nhwc_num_inputs = transpose_to_nhwc->num_inputs(); + gtl::InlinedVector transpose_nhwc_control_edges; + gtl::InlinedVector, 4> transpose_nhwc_in( + transpose_nhwc_num_inputs); + FillInputs(transpose_to_nhwc, &transpose_nhwc_control_edges, + &transpose_nhwc_in); + + const int mklop_num_inputs = mklop->num_inputs(); + gtl::InlinedVector mklop_control_edges; + gtl::InlinedVector, 4> mklop_in(mklop_num_inputs); + FillInputs(mklop, &mklop_control_edges, &mklop_in); + + const int transpose_nchw_num_inputs = transpose_to_nchw->num_inputs(); + gtl::InlinedVector transpose_nchw_control_edges; + gtl::InlinedVector, 4> transpose_nchw_in( + transpose_nchw_num_inputs); + FillInputs(transpose_to_nhwc, &transpose_nchw_control_edges, + &transpose_nchw_in); + + // We will use the node name of Conv2d as the name of new node + // Build new node. We use same name as original node, but change the op + // name. + NodeBuilder nb(mklop->name(), mklop->type_string()); + + for (int i = 0; i < mklop_num_inputs; i++) { + if (mklop_in[i].first == transpose_to_nhwc) { + // Fill "x": + nb.Input(transpose_nhwc_in[0].first, transpose_nhwc_in[0].second); + } else { + // Fill inputs other than "x": + nb.Input(mklop_in[i].first, mklop_in[i].second); + } + } + + copy_attrs(const_cast(mklop), &nb, true); + nb.Attr("data_format", data_format); + + // Copy the device assigned to old node to new node. + nb.Device(mklop->def().device()); + + // Create node. + Node *new_node; + TF_CHECK_OK(nb.Finalize(&**g, &new_node)); + CHECK_NOTNULL(new_node); + + // Fill outputs. + for (const Edge *e : transpose_to_nchw->out_edges()) { + if (!e->IsControlEdge()) { + const int kConv2DWithBiasOutputSlot = 0; + CHECK_NOTNULL((*g)->AddEdge(new_node, kConv2DWithBiasOutputSlot, e->dst(), + e->dst_input())); + } + } + + // Copy device assigned to old node to new node. + new_node->set_assigned_device_name(mklop->assigned_device_name()); + + (*g)->RemoveNode(transpose_to_nhwc); + (*g)->RemoveNode(mklop); + (*g)->RemoveNode(transpose_to_nchw); + + return Status::OK(); +} + +Status +MklLayoutRewritePass::FuseNode(std::unique_ptr *g, + std::vector &nodes, + const MklLayoutRewritePass::FusionInfo fi) { + return fi.fuse_func(g, nodes, fi.copy_attrs); +} + +std::tuple, const MklLayoutRewritePass::FusionInfo> +MklLayoutRewritePass::CheckForNodeFusion(Node *a) const { + bool found_pattern = false; + std::vector nodes; + const FusionInfo *fi_ptr = nullptr; + + for (auto fi = finfo_.begin(); fi != finfo_.end(); ++fi) { + assert(fi->ops.size() == fi->node_checkers.size()); + nodes.clear(); + fi_ptr = &*fi; + // + // Make sure node "a" and its succeding nodes (b, c ...), match the pattern + // defined in fusion info (ops[0], ops[1], ...), + // aka. "a->b->c" matches "op1->op2->op3" + // + + // Initialize "current_node" as node "a". + Node *current_node = a; + for (auto node_index = 0; node_index < fi->node_checkers.size(); + ++node_index) { + // Make sure current node meet the requirement of corresponding node + // checker. + auto check_node = fi->node_checkers[node_index]; + if (current_node == nullptr || + (check_node && check_node(current_node) == false)) { + found_pattern = false; + nodes.clear(); + break; + } + + // Add current_node to "fusion_nodes": + nodes.push_back(current_node); + + // If current node is not the last node we want to check, check next node. + if (node_index != fi->node_checkers.size() - 1) { + // Find current node's direct descendant, which will be used in next + // iteration. + auto check_next_node = fi->node_checkers[node_index + 1]; + for (const Edge *e : current_node->out_edges()) { + if (!e->IsControlEdge()) { + Node *candidate_node = e->dst(); + + if (check_next_node(candidate_node) == false) { + current_node = nullptr; + } else { + current_node = candidate_node; + break; + } + } + } + } else { + found_pattern = true; + } + } + + if (found_pattern == true) { + break; + } + } + + return make_tuple(found_pattern, nodes, *fi_ptr); +} + /////////////////////////////////////////////////////////////////////////////// // Post-rewrite Mkl metadata fixup pass /////////////////////////////////////////////////////////////////////////////// @@ -2516,6 +2931,29 @@ bool MklLayoutRewritePass::RunPass(std::unique_ptr* g) { DumpGraph("After running MklLayoutRewritePass(NodeMerge)", &**g); + order.clear(); + GetReversePostOrder(**g, &order); // This will give us topological sort. + for (Node *n : order) { + // If node is not an op or it cannot run on CPU device, then skip. + if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) { + continue; + } + + auto check_result = CheckForNodeFusion(n); + bool found_pattern = std::get<0>(check_result); + std::vector nodes = std::get<1>(check_result); + const FusionInfo fi = std::get<2>(check_result); + + // if "found_pattern" is true, we can do the fusion. + if (found_pattern) { + if (FuseNode(g, nodes, fi) == Status::OK()) { + result = true; + } + } + } + + DumpGraph("After running MklLayoutRewritePass(NodeFusion)", &**g); + order.clear(); GetReversePostOrder(**g, &order); // This will give us topological sort. for (Node* n : order) { diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 7e2d1f7878..b09ef3b970 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -455,6 +455,301 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) { "E:3->G:4;F->G;F:control->DMT/_3:control;G->Z;X->Y:1;X->Z:1"); } +TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Positive) { + InitGraph( + "node { name: 'Input0' op: 'Input'}" + "node { name: 'Input1' op: 'Input'}" + "node { name: 'Const0' op: 'Const'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_INT32" + " }" + " }" + " attr {" + " key: 'value'" + " value {" + " tensor {" + " dtype: DT_INT32" + " tensor_shape {" + " dim {" + " size: 4" + " }" + " }" + " tensor_content: '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000'" + " }" + " }" + " }" + "}" + "node { name: 'Const1' op: 'Const'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_INT32" + " }" + " }" + " attr {" + " key: 'value'" + " value {" + " tensor {" + " dtype: DT_INT32" + " tensor_shape {" + " dim {" + " size: 4" + " }" + " }" + " tensor_content: '\\000\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000\\002\\000\\000\\000'" + " }" + " }" + " }" + "}" + "node { \ + name: 'Transpose0' \ + op: 'Transpose' \ + input: 'Input0' \ + input: 'Const0' \ + attr { \ + key: 'T' \ + value { \ + type: DT_FLOAT \ + } \ + } \ + attr { \ + key: 'Tperm' \ + value { \ + type: DT_INT32 \ + } \ + } \ + }" + "node { \ + name: 'Conv2D' \ + op: 'Conv2D' \ + input: 'Transpose0' \ + input: 'Input1' \ + attr { \ + key: 'T' \ + value { \ + type: DT_FLOAT \ + } \ + } \ + attr { \ + key: 'data_format' \ + value { \ + s: 'NHWC' \ + } \ + } \ + attr { \ + key: 'dilations' \ + value { \ + list { \ + i: 1 \ + i: 1 \ + i: 1 \ + i: 1 \ + } \ + } \ + } \ + attr { \ + key: 'padding' \ + value { \ + s: 'SAME' \ + } \ + } \ + attr { \ + key: 'strides' \ + value { \ + list { \ + i: 1 \ + i: 1 \ + i: 1 \ + i: 1 \ + } \ + } \ + } \ + attr { \ + key: 'use_cudnn_on_gpu' \ + value { \ + b: true \ + } \ + } \ + }" + "node { \ + name: 'Transpose1' \ + op: 'Transpose' \ + input: 'Conv2D' \ + input: 'Const1' \ + attr { \ + key: 'T' \ + value { \ + type: DT_FLOAT \ + } \ + } \ + attr { \ + key: 'Tperm' \ + value { \ + type: DT_INT32 \ + } \ + } \ + }" + "node { name: 'Relu' op: 'Relu'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " input: ['Transpose1'] }"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "Const0(Const);Const1(Const);" + "Conv2D(_MklConv2D);DMT/_0(Const);DMT/_1(Const);Input0(Input);" + "Input1(Input);Relu(_MklRelu)|Conv2D->Relu;Conv2D:2->Relu:1;DMT/_0->Conv2D:2;DMT/_1->Conv2D:3;Input0->Conv2D;" + "Input0:control->DMT/_0:control;Input0:control->DMT/_1:control;Input1->Conv2D:1"); +} + +TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Negative) { + InitGraph( + "node { name: 'Input0' op: 'Input'}" + "node { name: 'Input1' op: 'Input'}" + "node { name: 'Const0' op: 'Const'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_INT32" + " }" + " }" + " attr {" + " key: 'value'" + " value {" + " tensor {" + " dtype: DT_INT32" + " tensor_shape {" + " dim {" + " size: 4" + " }" + " }" + " tensor_content: '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000'" + " }" + " }" + " }" + "}" + "node { name: 'Const1' op: 'Const'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_INT32" + " }" + " }" + " attr {" + " key: 'value'" + " value {" + " tensor {" + " dtype: DT_INT32" + " tensor_shape {" + " dim {" + " size: 4" + " }" + " }" + " tensor_content: '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000'" + " }" + " }" + " }" + "}" + "node { \ + name: 'Transpose0' \ + op: 'Transpose' \ + input: 'Input0' \ + input: 'Const0' \ + attr { \ + key: 'T' \ + value { \ + type: DT_FLOAT \ + } \ + } \ + attr { \ + key: 'Tperm' \ + value { \ + type: DT_INT32 \ + } \ + } \ + }" + "node { \ + name: 'Conv2D' \ + op: 'Conv2D' \ + input: 'Transpose0' \ + input: 'Input1' \ + attr { \ + key: 'T' \ + value { \ + type: DT_FLOAT \ + } \ + } \ + attr { \ + key: 'data_format' \ + value { \ + s: 'NHWC' \ + } \ + } \ + attr { \ + key: 'dilations' \ + value { \ + list { \ + i: 1 \ + i: 1 \ + i: 1 \ + i: 1 \ + } \ + } \ + } \ + attr { \ + key: 'padding' \ + value { \ + s: 'SAME' \ + } \ + } \ + attr { \ + key: 'strides' \ + value { \ + list { \ + i: 1 \ + i: 1 \ + i: 1 \ + i: 1 \ + } \ + } \ + } \ + attr { \ + key: 'use_cudnn_on_gpu' \ + value { \ + b: true \ + } \ + } \ + }" + "node { \ + name: 'Transpose1' \ + op: 'Transpose' \ + input: 'Conv2D' \ + input: 'Const1' \ + attr { \ + key: 'T' \ + value { \ + type: DT_FLOAT \ + } \ + } \ + attr { \ + key: 'Tperm' \ + value { \ + type: DT_INT32 \ + } \ + } \ + }" + "node { name: 'Relu' op: 'Relu'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " input: ['Transpose1'] }"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "Const0(Const);Const1(Const);" + "Conv2D(_MklConv2D);DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);" + "Input0(Input);Input1(Input);Relu(_MklRelu);" + "Transpose0(Transpose);Transpose1(Transpose)|Const0->Transpose0:1;Const1->Transpose1:1;" + "Conv2D->Transpose1;DMT/_0->Conv2D:2;DMT/_1->Conv2D:3;DMT/_2->Relu:1;Input0->Transpose0;" + "Input1->Conv2D:1;Transpose0->Conv2D;Transpose0:control->DMT/_0:control;" + "Transpose0:control->DMT/_1:control;Transpose1->Relu;Transpose1:control->DMT/_2:control"); +} + ///////////////////////////////////////////////////////////////////// // Unit tests related to rewriting node to Mkl node ///////////////////////////////////////////////////////////////////// -- GitLab From 8d4805d8a3d2b2b48c69d9cd7f358996cb9a87a1 Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Mon, 22 Oct 2018 11:09:58 -0700 Subject: [PATCH 0091/1078] cleaning up MKL ML code in LRN kernel --- tensorflow/core/kernels/mkl_lrn_op.cc | 680 +------------------------- tensorflow/core/ops/nn_ops.cc | 8 - 2 files changed, 2 insertions(+), 686 deletions(-) diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc index 22ff4cd80f..4d46abb0a4 100644 --- a/tensorflow/core/kernels/mkl_lrn_op.cc +++ b/tensorflow/core/kernels/mkl_lrn_op.cc @@ -22,6 +22,7 @@ limitations under the License. #define EIGEN_USE_THREADS #include +#include "mkldnn.hpp" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" @@ -29,25 +30,18 @@ limitations under the License. #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/tensor_format.h" #if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/util/work_sharder.h" #endif -#ifndef INTEL_MKL_ML_ONLY -#include "mkldnn.hpp" using mkldnn::lrn_across_channels; using mkldnn::lrn_backward; using mkldnn::lrn_forward; using mkldnn::prop_kind; using mkldnn::stream; -#else -#include "mkl_dnn.h" -#include "mkl_dnn_types.h" -#endif - -#include "tensorflow/core/util/mkl_util.h" namespace tensorflow { @@ -69,672 +63,6 @@ void GetBandMatrix(int depth, int depth_radius, } // namespace -#ifdef INTEL_MKL_ML_ONLY - -template -class MklLRNOp : public OpKernel { - public: - ~MklLRNOp() {} - - explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) { - int64 depth_radius64; - OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES( - context, - FastBoundsCheck(depth_radius64, std::numeric_limits::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); - depth_radius_ = static_cast(depth_radius64); - - OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); - OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); - OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_)); - workspace_enabled_ = false; - OP_REQUIRES_OK(context, - context->GetAttr("workspace_enabled", &workspace_enabled_)); - } - - void Compute(OpKernelContext* context) override { - MklLRNOpContext mkl_context; - - const Tensor& input = MklGetInput(context, 0); - GetMklShape(context, 0, &mkl_context.input_shape); - bool input_in_mkl_format = mkl_context.input_shape.IsMklTensor(); - - // Sanity checks - mkl_context.in_dims = input_in_mkl_format - ? mkl_context.input_shape.GetDimension() - : input.dims(); - OP_REQUIRES(context, mkl_context.in_dims == 4, - errors::InvalidArgument("input must be 4-dimensional")); - OP_REQUIRES( - context, - FastBoundsCheck(input.NumElements(), std::numeric_limits::max()), - errors::InvalidArgument("argument to LRN too large")); - - if (!input_in_mkl_format) { - mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_, - beta_, input); - return; - } - - if (input_in_mkl_format) { - // MKL supports normalization over channel dimension only - if (mkl_context.input_shape.tf_dim_idx(mkl_context.in_dims - 1) == - MklDims::C) { - mkl_context.lt_input = - static_cast(mkl_context.input_shape.GetCurLayout()); - workspace_enabled_ = true; - } else { - Tensor converted_tensor = - ConvertMklToTF(context, input, mkl_context.input_shape); - mkl_context.MklDefaultToEigen(context, depth_radius_, bias_, alpha_, - beta_, converted_tensor); - return; - } - } - - int kernel_size = 2 * depth_radius_ + 1; - - CHECK_EQ(dnnLRNCreateForward_F32( - &mkl_context.lrn_fwd, NULL, mkl_context.lt_input, kernel_size, - static_cast(alpha_ * kernel_size), beta_, bias_), - E_SUCCESS); - - // Allocate output tensor and shape - Tensor* output = nullptr; - Tensor* workspace = nullptr; - - // Convert Inputs if needed - Tensor mkl_tmp_input_buf_tensor; - mkl_context.MklPrepareLRNInputs(context, &mkl_tmp_input_buf_tensor); - - // Allocate Layer Outputs - mkl_context.MklAllocateOutputs(context, &output, &workspace, - workspace_enabled_); - - Tensor mkl_tmp_workspace_buf_tensor; - mkl_context.MklPrepareLRNOutputs(context, output, workspace, - &mkl_tmp_workspace_buf_tensor, - workspace_enabled_); - - // Execute LRN. - CHECK_EQ(dnnExecute_F32(mkl_context.lrn_fwd, mkl_context.lrn_res), - E_SUCCESS); - - // Release MKL resources. - mkl_context.MklCleanup(); - } - - private: - typedef struct { - size_t in_dims; - size_t in_sizes[4]; - size_t in_strides[4]; - size_t out_sizes[4]; - size_t out_strides[4]; - MklShape input_shape; - dnnPrimitive_t lrn_fwd = nullptr; - dnnPrimitive_t convert_input = nullptr; - dnnLayout_t lt_input = nullptr; - dnnLayout_t lt_internal_input = nullptr; - dnnLayout_t lt_internal_workspace = nullptr; - dnnLayout_t lt_internal_output = nullptr; - void* lrn_res[dnnResourceNumber]; - - // Convert Inputs if needed - void MklPrepareLRNInputs(OpKernelContext* context, - Tensor* mkl_tmp_input_buf_tensor) { - const Tensor& input = MklGetInput(context, 0); - void* mkl_buf_input = - const_cast(static_cast(input.flat().data())); - - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_internal_input, lrn_fwd, - dnnResourceSrc), - E_SUCCESS); - - void* mkl_buf_convert_input = nullptr; - bool mkl_convert_input = false; - mkl_convert_input = !dnnLayoutCompare_F32(lt_internal_input, lt_input); - - if (mkl_convert_input) { - CHECK_EQ(dnnConversionCreate_F32(&convert_input, lt_input, - lt_internal_input), - E_SUCCESS); - AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_internal_input, - &mkl_buf_convert_input); - CHECK_EQ(dnnConversionExecute_F32(convert_input, mkl_buf_input, - mkl_buf_convert_input), - E_SUCCESS); - dnnDelete_F32(convert_input); - } - - lrn_res[dnnResourceSrc] = - (mkl_convert_input) ? mkl_buf_convert_input : mkl_buf_input; - } - - // Allocate Layer Outputs - void MklAllocateOutputs(OpKernelContext* context, Tensor** output, - Tensor** workspace, bool workspace_enabled_) { - TensorShape mkl_output_tf_shape; /* First tensor */ - MklShape mkl_output_mkl_shape; /* Second tensor */ - - mkl_output_mkl_shape.SetMklTensor(true); - mkl_output_mkl_shape.SetMklLayout(lrn_fwd, dnnResourceDst); - mkl_output_mkl_shape.SetTfLayout(in_dims, input_shape.GetSizes(), - input_shape.GetStrides()); - mkl_output_mkl_shape.SetTfDimOrder(in_dims, - input_shape.GetTfToMklDimMap()); - mkl_output_tf_shape.AddDim( - dnnLayoutGetMemorySize_F32( - static_cast(mkl_output_mkl_shape.GetMklLayout())) / - sizeof(T)); - AllocateOutputSetMklShape(context, 0, output, - mkl_output_tf_shape /* First tensor */, - mkl_output_mkl_shape /* Second Tensor */); - - if (workspace_enabled_) { - TensorShape mkl_workspace_tf_shape; /* First tensor */ - MklShape mkl_workspace_mkl_shape; /* Second tensor */ - mkl_workspace_mkl_shape.SetMklTensor(false); - mkl_workspace_mkl_shape.SetMklLayout(lrn_fwd, dnnResourceWorkspace); - // Assumes workspace has same TF layout and TF dim order as input - mkl_workspace_mkl_shape.SetTfLayout(in_dims, input_shape.GetSizes(), - input_shape.GetStrides()); - mkl_workspace_mkl_shape.SetTfDimOrder(in_dims, - input_shape.GetTfToMklDimMap()); - mkl_workspace_tf_shape.AddDim( - dnnLayoutGetMemorySize_F32(static_cast( - mkl_workspace_mkl_shape.GetMklLayout())) / - sizeof(T)); - AllocateOutputSetMklShape(context, 1, workspace, - mkl_workspace_tf_shape /* First tensor */, - mkl_workspace_mkl_shape /* Second Tensor */); - } - } - - void MklPrepareLRNOutputs(OpKernelContext* context, Tensor* output, - Tensor* workspace, - Tensor* mkl_tmp_workspace_buf_tensor, - bool workspace_enabled_) { - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_internal_workspace, lrn_fwd, - dnnResourceWorkspace), - E_SUCCESS); - - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_internal_output, lrn_fwd, - dnnResourceDst), - E_SUCCESS); - - void* mkl_buf_output = - const_cast(static_cast(output->flat().data())); - lrn_res[dnnResourceDst] = mkl_buf_output; - - void* mkl_buf_workspace = nullptr; - if (workspace_enabled_) { - mkl_buf_workspace = const_cast( - static_cast(workspace->flat().data())); - } else { - AllocTmpBuffer(context, mkl_tmp_workspace_buf_tensor, - lt_internal_workspace, &mkl_buf_workspace); - } - lrn_res[dnnResourceWorkspace] = mkl_buf_workspace; - } - - // Fallback implementation - Taken from lrn_op.cc - // TODO(inteltf) Check if we can use EigenLRNOp directly instead of making a - // copy. - void MklDefaultToEigen(OpKernelContext* context, int depth_radius_, - float bias_, float alpha_, float beta_, - const Tensor& input) { - const int batch = static_cast(input.dim_size(0)); - const int rows = static_cast(input.dim_size(1)); - const int cols = static_cast(input.dim_size(2)); - const int depth = static_cast(input.dim_size(3)); - const int nodes = cols * rows; - - auto in_shaped = input.shaped({nodes * batch, depth}); - // Multiplying the input with the band matrix has the effect of reducing - // the - // correct patch along the depth. - Eigen::Tensor multiplier(depth, depth); - GetBandMatrix(depth, depth_radius_, &multiplier); - - Tensor *output, *workspace; - MklShape mkl_output_mkl_shape, mkl_workspace_mkl_shape; - mkl_output_mkl_shape.SetMklTensor(false); - mkl_output_mkl_shape.SetDimensions(4); - AllocateOutputSetMklShape(context, 0, &output, input.shape(), - mkl_output_mkl_shape); - - mkl_workspace_mkl_shape.SetMklTensor(false); - mkl_workspace_mkl_shape.SetDimensions(4); - AllocateOutputSetMklShape(context, 1, &workspace, input.shape(), - mkl_workspace_mkl_shape); - - auto out_shaped = output->shaped({nodes * batch, depth}); - Eigen::array dims = {{DimPair(1, 0)}}; - auto tmp = in_shaped.square().contract(multiplier, dims) * alpha_ + bias_; - if (beta_ == T(1)) { - out_shaped.device(context->eigen_cpu_device()) = - in_shaped * tmp.inverse(); - } else if (beta_ == T(0.5)) { - out_shaped.device(context->eigen_cpu_device()) = - in_shaped * tmp.rsqrt(); - } else { - out_shaped.device(context->eigen_cpu_device()) = - in_shaped * (tmp.log() * -beta_).exp(); - } - } - - // Release MKL resources. - void MklCleanup() { - dnnDelete_F32(lrn_fwd); - dnnLayoutDelete_F32(lt_internal_input); - dnnLayoutDelete_F32(lt_internal_workspace); - dnnLayoutDelete_F32(lt_internal_output); - } - } MklLRNOpContext; - - typedef typename Eigen::Tensor::DimensionPair DimPair; - - bool workspace_enabled_; - int depth_radius_; - float bias_; - float alpha_; - float beta_; -}; - -template -class MklLRNGradOp : public OpKernel { - public: - explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) { - int64 depth_radius64; - OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES( - context, - FastBoundsCheck(depth_radius64, std::numeric_limits::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); - depth_radius_ = static_cast(depth_radius64); - OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); - OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); - OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_)); - workspace_enabled_ = false; - OP_REQUIRES_OK(context, - context->GetAttr("workspace_enabled", &workspace_enabled_)); - } - - void Compute(OpKernelContext* context) override { - MklLRNGradOpContext mkl_context; - mkl_context.depth_radius_ = depth_radius_; - mkl_context.bias_ = bias_; - mkl_context.alpha_ = alpha_; - mkl_context.beta_ = beta_; - - const Tensor& in_grads = MklGetInput(context, 0); - const Tensor& in_image = MklGetInput(context, 1); - const Tensor& out_image = MklGetInput(context, 2); - - GetMklShape(context, 0, &mkl_context.ingrad_shape); - GetMklShape(context, 1, &mkl_context.inimage_shape); - GetMklShape(context, 2, &mkl_context.outimage_shape); - - bool ingrad_in_mkl_format = mkl_context.ingrad_shape.IsMklTensor(); - bool inimage_in_mkl_format = mkl_context.inimage_shape.IsMklTensor(); - bool outimage_in_mkl_format = mkl_context.outimage_shape.IsMklTensor(); - - mkl_context.in_dims = inimage_in_mkl_format - ? mkl_context.inimage_shape.GetDimension() - : in_image.dims(); - OP_REQUIRES(context, mkl_context.in_dims == 4, - errors::InvalidArgument("input images must be 4-dimensional")); - - if (!workspace_enabled_) { - mkl_context.MklDefaultToEigen(context); - return; - } - - if (ingrad_in_mkl_format || inimage_in_mkl_format) { - const MklShape* tmp_mkl_shape = (ingrad_in_mkl_format) - ? &mkl_context.ingrad_shape - : &mkl_context.inimage_shape; - if (tmp_mkl_shape->tf_dim_idx(mkl_context.in_dims - 1) != MklDims::C) { - // Fallback to eigen - mkl_context.MklDefaultToEigen(context); - return; - } else { // MKL supports normalization over channel dimension only - for (int i = 0; i < mkl_context.in_dims; i++) { - mkl_context.in_sizes[i] = mkl_context.out_sizes[i] = - tmp_mkl_shape->GetSizes()[i]; - mkl_context.in_strides[i] = mkl_context.out_strides[i] = - tmp_mkl_shape->GetStrides()[i]; - } - } - } else { - // Fallback to eigen - mkl_context.MklDefaultToEigen(context); - return; - } - - // Dimensions check for sanity purpose - if (ingrad_in_mkl_format) { - OP_REQUIRES( - context, mkl_context.ingrad_shape.GetDimension() == 4, - errors::InvalidArgument("input gradient must be 4-dimensional")); - } else { - OP_REQUIRES( - context, in_grads.dims() == 4, - errors::InvalidArgument("input gradient must be 4-dimensional")); - } - - if (outimage_in_mkl_format) { - OP_REQUIRES( - context, mkl_context.outimage_shape.GetDimension() == 4, - errors::InvalidArgument("Output image must be 4-dimensional")); - } else { - OP_REQUIRES( - context, out_image.dims() == 4, - errors::InvalidArgument("Output image must be 4-dimensional")); - } - - // Prepare mkl input layout - mkl_context.MklPrepareLRNInputsLayouts(context); - int ksize = 2 * depth_radius_ + 1; - - CHECK_EQ(dnnLRNCreateBackward_F32( - &mkl_context.lrn_bwd, NULL, mkl_context.lt_input, - mkl_context.lt_output, ksize, - static_cast(alpha_ * ksize), beta_, bias_), - E_SUCCESS); - - // Allocate output tensor and shape. - TensorShape mkl_output_tf_shape; /* First tensor */ - MklShape mkl_output_mkl_shape; /* Second tensor */ - mkl_output_mkl_shape.SetMklTensor(true); - CHECK_NE(mkl_context.lrn_bwd, nullptr); - mkl_output_mkl_shape.SetMklLayout(mkl_context.lrn_bwd, dnnResourceDiffSrc); - mkl_output_mkl_shape.SetTfLayout(mkl_context.in_dims, mkl_context.out_sizes, - mkl_context.out_strides); - if (ingrad_in_mkl_format) { - mkl_output_mkl_shape.SetTfDimOrder( - mkl_context.in_dims, mkl_context.ingrad_shape.GetTfToMklDimMap()); - } else { - mkl_output_mkl_shape.SetTfDimOrder( - mkl_context.in_dims, mkl_context.inimage_shape.GetTfToMklDimMap()); - } - mkl_output_tf_shape.AddDim( - dnnLayoutGetMemorySize_F32( - static_cast(mkl_output_mkl_shape.GetMklLayout())) / - sizeof(T)); - Tensor* output = nullptr; - AllocateOutputSetMklShape(context, 0, &output, mkl_output_tf_shape, - mkl_output_mkl_shape); - - // Get pointers to output data. - void* user_output = - const_cast(static_cast(output->flat().data())); - - Tensor mkl_tmp_input_buf_tensor, mkl_tmp_image_buf_tensor, - mkl_tmp_outimage_buf_tensor; - // Convert Inputs if needed - mkl_context.MklPrepareLRNGradInput(context, &mkl_tmp_input_buf_tensor, - &mkl_tmp_image_buf_tensor, - &mkl_tmp_outimage_buf_tensor); - - // We do not do any conversion for output. But we simply emit it - // in MKL format. - mkl_context.res_lrn_bwd[dnnResourceDiffSrc] = user_output; - // Execute LRN backward using dnnExecute - CHECK_EQ(dnnExecute_F32(mkl_context.lrn_bwd, mkl_context.res_lrn_bwd), - E_SUCCESS); - // Release MKL resources. - mkl_context.Mklcleanup(); - } - - private: - typedef struct { - int depth_radius_; - float bias_; - float alpha_; - float beta_; - size_t in_dims; - size_t in_sizes[4]; - size_t in_strides[4]; - size_t out_sizes[4]; - size_t out_strides[4]; - MklShape ingrad_shape, inimage_shape, outimage_shape; - dnnPrimitive_t lrn_bwd = nullptr; - dnnPrimitive_t convert_input = nullptr; - dnnLayout_t lt_input = nullptr; - dnnLayout_t lt_output = nullptr; - dnnLayout_t lt_bdw_input = nullptr; - dnnLayout_t lt_workspace = nullptr; - dnnLayout_t lt_internal_input = nullptr; - void* res_lrn_bwd[dnnResourceNumber]; - - // prepare mkl input - void MklPrepareLRNInputsLayouts(OpKernelContext* context) { - bool ingrad_in_mkl_format = ingrad_shape.IsMklTensor(); - bool inimage_in_mkl_format = inimage_shape.IsMklTensor(); - if (!ingrad_in_mkl_format) { - CHECK_EQ(dnnLayoutCreate_F32(<_input, in_dims, in_sizes, in_strides), - E_SUCCESS); - } else { - lt_input = static_cast(ingrad_shape.GetCurLayout()); - } - - if (!inimage_in_mkl_format) { - CHECK_EQ( - dnnLayoutCreate_F32(<_output, in_dims, out_sizes, out_strides), - E_SUCCESS); - } else { - lt_output = static_cast(inimage_shape.GetCurLayout()); - } - } - - // convert input if needed - void MklPrepareLRNGradInput(OpKernelContext* context, - Tensor* mkl_tmp_input_buf_tensor, - Tensor* mkl_tmp_image_buf_tensor, - Tensor* mkl_tmp_outimage_buf_tensor) { - const Tensor& in_grads = MklGetInput(context, 0); - const Tensor& in_image = MklGetInput(context, 1); - const Tensor& workspace = MklGetInput( - context, - 3); /*Worskpsace is enabled, get the buffer to the workspace */ - - void* user_input = const_cast( - static_cast(in_grads.flat().data())); - void* user_fwd_input = const_cast( - static_cast(in_image.flat().data())); - void* workspace_buffer = const_cast( - static_cast(workspace.flat().data())); - - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_workspace, lrn_bwd, - dnnResourceWorkspace), - E_SUCCESS); - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_bdw_input, lrn_bwd, - dnnResourceDiffDst), - E_SUCCESS); - CHECK_EQ(dnnLayoutCreateFromPrimitive_F32(<_internal_input, lrn_bwd, - dnnResourceSrc), - E_SUCCESS); - - bool ingrad_in_mkl_format = ingrad_shape.IsMklTensor(); - if (ingrad_in_mkl_format) { - if (!dnnLayoutCompare_F32(lt_bdw_input, lt_input)) { - AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_bdw_input, - &res_lrn_bwd[dnnResourceDiffDst]); - ingrad_shape.GetConvertedFlatData(lt_bdw_input, user_input, - res_lrn_bwd[dnnResourceDiffDst]); - } else { - res_lrn_bwd[dnnResourceDiffDst] = user_input; - } - } else { - if (!dnnLayoutCompare_F32(lt_bdw_input, lt_input)) { - CHECK_EQ( - dnnConversionCreate_F32(&convert_input, lt_input, lt_bdw_input), - E_SUCCESS); - - AllocTmpBuffer(context, mkl_tmp_input_buf_tensor, lt_bdw_input, - &res_lrn_bwd[dnnResourceDiffDst]); - CHECK_EQ(dnnConversionExecute_F32(convert_input, user_input, - res_lrn_bwd[dnnResourceDiffDst]), - E_SUCCESS); - dnnDelete_F32(convert_input); - } else { - res_lrn_bwd[dnnResourceDiffDst] = user_input; - } - } - - bool inimage_in_mkl_format = inimage_shape.IsMklTensor(); - if (inimage_in_mkl_format) { - if (!dnnLayoutCompare_F32( - lt_internal_input, - static_cast(inimage_shape.GetCurLayout()))) { - AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input, - &res_lrn_bwd[dnnResourceSrc]); - ingrad_shape.GetConvertedFlatData(lt_internal_input, user_fwd_input, - res_lrn_bwd[dnnResourceSrc]); - } else { - res_lrn_bwd[dnnResourceSrc] = user_fwd_input; - } - } else { - if (!dnnLayoutCompare_F32( - lt_internal_input, - static_cast(inimage_shape.GetCurLayout()))) { - CHECK_EQ(dnnConversionCreate_F32( - &convert_input, - static_cast(inimage_shape.GetCurLayout()), - lt_internal_input), - E_SUCCESS); - - AllocTmpBuffer(context, mkl_tmp_image_buf_tensor, lt_internal_input, - &res_lrn_bwd[dnnResourceSrc]); - CHECK_EQ(dnnConversionExecute_F32(convert_input, user_fwd_input, - res_lrn_bwd[dnnResourceSrc]), - E_SUCCESS); - dnnDelete_F32(convert_input); - } else { - res_lrn_bwd[dnnResourceSrc] = user_fwd_input; - } - } - - res_lrn_bwd[dnnResourceWorkspace] = workspace_buffer; - } - - // Fallback implementation - Taken from lrn_op.cc - // TODO(intelft) Check if we can use EigenLRNOp directly instead of making a - // copy. - void MklDefaultToEigen(OpKernelContext* context) { - Tensor in_grads; - Tensor in_image; - Tensor out_image; - - GetMklShape(context, 0, &ingrad_shape); - GetMklShape(context, 1, &inimage_shape); - GetMklShape(context, 2, &outimage_shape); - - if (ingrad_shape.IsMklTensor()) { - in_grads = - ConvertMklToTF(context, MklGetInput(context, 0), ingrad_shape); - } else { - in_grads = MklGetInput(context, 0); - } - - if (inimage_shape.IsMklTensor()) { - in_image = - ConvertMklToTF(context, MklGetInput(context, 1), inimage_shape); - } else { - in_image = MklGetInput(context, 1); - } - - if (outimage_shape.IsMklTensor()) { - out_image = - ConvertMklToTF(context, MklGetInput(context, 2), outimage_shape); - } else { - out_image = MklGetInput(context, 2); - } - - const int64 batch = static_cast(in_grads.dim_size(0)); - const int64 rows = static_cast(in_grads.dim_size(1)); - const int64 cols = static_cast(in_grads.dim_size(2)); - const int64 depth = static_cast(in_grads.dim_size(3)); - const auto nodes = cols * rows; - - auto grads_shaped = in_grads.shaped({nodes * batch, depth}); - - auto in_shaped = in_image.shaped({nodes * batch, depth}); - auto activations = out_image.shaped({nodes * batch, depth}); - - Tensor* output; - MklShape mkl_output_mkl_shape; - mkl_output_mkl_shape.SetMklTensor(false); - mkl_output_mkl_shape.SetDimensions(4); - AllocateOutputSetMklShape(context, 0, &output, in_grads.shape(), - mkl_output_mkl_shape); - - auto out_shaped = output->shaped({nodes * batch, depth}); - out_shaped.setZero(); - auto shard = [this, activations, in_shaped, grads_shaped, out_shaped, - depth](int64 begin, int64 end) { - for (int64 i = begin; i < end; ++i) { - for (int64 j = 0; j < depth; ++j) { - int64 depth_begin = std::max(0, j - depth_radius_); - int64 depth_end = std::min(depth, j + depth_radius_ + 1); - - T norm(0); - for (int64 k = depth_begin; k < depth_end; ++k) { - norm += in_shaped(i, k) * in_shaped(i, k); - } - norm = alpha_ * norm + bias_; - DCHECK_GT(norm, T(1e-6)); - for (int64 k = depth_begin; k < depth_end; ++k) { - T dyi = T(-2) * alpha_ * beta_ * in_shaped(i, k) * - activations(i, j) / norm; - if (k == j) { - dyi += Eigen::numext::pow(norm, -beta_); - } - dyi *= grads_shaped(i, j); - const_cast::Tensor&>(out_shaped)(i, k) += - dyi; - } - } - } - }; - auto worker_threads = - *(context->device()->tensorflow_cpu_worker_threads()); - Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch, - depth * depth, shard); - } - - // release mkl resources - void Mklcleanup() { - bool ingrad_in_mkl_format = ingrad_shape.IsMklTensor(); - bool inimage_in_mkl_format = inimage_shape.IsMklTensor(); - if (!ingrad_in_mkl_format) { - CHECK_EQ(dnnLayoutDelete_F32(lt_input), E_SUCCESS); - } - - if (!inimage_in_mkl_format) { - CHECK_EQ(dnnLayoutDelete_F32(lt_output), E_SUCCESS); - } - dnnDelete_F32(lrn_bwd); - dnnLayoutDelete_F32(lt_bdw_input); - dnnLayoutDelete_F32(lt_workspace); - } - } MklLRNGradOpContext; - - typedef typename Eigen::Tensor::DimensionPair DimPair; - bool workspace_enabled_; - int depth_radius_; - float bias_; - float alpha_; - float beta_; -}; - -#else - template class MklLRNOp : public OpKernel { public: @@ -847,7 +175,6 @@ class MklLRNOp : public OpKernel { MklDnnData* src_dnn_data, MklDnnData* dst_dnn_data, MklDnnData* wksp_dnn_data = nullptr) { - // Check for input reorder src_dnn_data->CheckReorderToOpMem(lrn_fwd_desc.src_primitive_desc()); @@ -1160,7 +487,6 @@ class MklLRNGradOp : public OpKernel { MklDnnData* output_diff_src, const memory::primitive_desc& target_diff_dst_pd, const MklDnnData* workspace_dnn_data = nullptr) { - // Check for input reordering on the diff dst input input_gradient_diff_dst->CheckReorderToOpMem( lrn_bkwd_desc.diff_dst_primitive_desc()); @@ -1345,8 +671,6 @@ class MklLRNGradOp : public OpKernel { float beta_; }; -#endif // INTEL_MKL_ML_ONLY - #define REGISTER_MKL_LRN_CPU(T) \ REGISTER_KERNEL_BUILDER(Name("_MklLRN") \ .Device(DEVICE_CPU) \ diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 38fe45936a..bee0add89c 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -2172,11 +2172,7 @@ REGISTER_OP("_MklLRN") .Input("input: T") .Input("mkl_input: uint8") .Output("output: T") -#ifdef INTEL_MKL_ML_ONLY - .Output("workspace: T") -#else .Output("workspace: uint8") -#endif .Output("mkl_output: uint8") .Output("mkl_workspace: uint8") .Attr("depth_radius: int = 5") @@ -2200,11 +2196,7 @@ REGISTER_OP("_MklLRNGrad") .Input("input_grads: T") .Input("input_image: T") .Input("output_image: T") -#ifdef INTEL_MKL_ML_ONLY - .Input("workspace: T") -#else .Input("workspace: uint8") -#endif .Input("mkl_input_grads: uint8") .Input("mkl_input_image: uint8") .Input("mkl_output_image: uint8") -- GitLab From 3f9564a8b901c94eab2a21a764d8e177a45af12f Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 22 Oct 2018 14:12:18 -0700 Subject: [PATCH 0092/1078] Merging confusion_matrix naming --- tensorflow/python/ops/confusion_matrix.py | 3 +-- tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt | 4 ---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py index 8259142456..3c55ae68ac 100644 --- a/tensorflow/python/ops/confusion_matrix.py +++ b/tensorflow/python/ops/confusion_matrix.py @@ -90,8 +90,7 @@ def remove_squeezable_dimensions( return labels, predictions -@tf_export('train.confusion_matrix', 'confusion_matrix') -@deprecation.deprecated_endpoints('confusion_matrix') +@tf_export('confusion_matrix') def confusion_matrix(labels, predictions, num_classes=None, dtype=dtypes.int32, name=None, weights=None): """Computes the confusion matrix from predictions and labels. diff --git a/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt index 45c81fdd3b..9f35395284 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.train.pbtxt @@ -272,10 +272,6 @@ tf_module { name: "checkpoint_exists" argspec: "args=[\'checkpoint_prefix\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "confusion_matrix" - argspec: "args=[\'labels\', \'predictions\', \'num_classes\', \'dtype\', \'name\', \'weights\'], varargs=None, keywords=None, defaults=[\'None\', \"\", \'None\', \'None\'], " - } member_method { name: "cosine_decay" argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], " -- GitLab From 9fa2e774d3aa3f53592cf5e0d3fe26cb40e3d6a1 Mon Sep 17 00:00:00 2001 From: Anna Revinskaya Date: Mon, 22 Oct 2018 18:47:25 -0700 Subject: [PATCH 0093/1078] Removed unused import --- tensorflow/python/ops/confusion_matrix.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/ops/confusion_matrix.py b/tensorflow/python/ops/confusion_matrix.py index 3c55ae68ac..c09154129f 100644 --- a/tensorflow/python/ops/confusion_matrix.py +++ b/tensorflow/python/ops/confusion_matrix.py @@ -26,7 +26,6 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import sparse_ops -from tensorflow.python.util import deprecation from tensorflow.python.util.tf_export import tf_export -- GitLab From 185ae29da792ee8d42fa153e819c75787717174e Mon Sep 17 00:00:00 2001 From: Anna Revinskaya Date: Mon, 22 Oct 2018 19:06:15 -0700 Subject: [PATCH 0094/1078] Update V2 golden as well --- tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt index 7e980fe44d..cb6da5088b 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.train.pbtxt @@ -252,10 +252,6 @@ tf_module { name: "checkpoint_exists" argspec: "args=[\'checkpoint_prefix\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "confusion_matrix" - argspec: "args=[\'labels\', \'predictions\', \'num_classes\', \'dtype\', \'name\', \'weights\'], varargs=None, keywords=None, defaults=[\'None\', \"\", \'None\', \'None\'], " - } member_method { name: "cosine_decay" argspec: "args=[\'learning_rate\', \'global_step\', \'decay_steps\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.0\', \'None\'], " -- GitLab From c0814e3861c0b88caebc24d4ef1ce5e61a213f2e Mon Sep 17 00:00:00 2001 From: Rholais Lii Date: Tue, 23 Oct 2018 13:44:40 +0800 Subject: [PATCH 0095/1078] Fix comments to match usage Fix comments of `sparse_softmax_cross_entropy_with_logits` to match usage. --- tensorflow/python/ops/nn_ops.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index e31d162285..2477271a22 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1980,8 +1980,9 @@ def sparse_softmax_cross_entropy_with_logits( on `logits` internally for efficiency. Do not call this op with the output of `softmax`, as it will produce incorrect results. - A common use case is to have logits and labels of shape - `[batch_size, num_classes]`, but higher dimensions are supported, in which + A common use case is to have logits of shape + `[batch_size, num_classes]` and have labels of shape + `[batch_size]`, but higher dimensions are supported, in which case the `dim`-th dimension is assumed to be of size `num_classes`. `logits` must have the dtype of `float16`, `float32`, or `float64`, and `labels` must have the dtype of `int32` or `int64`. -- GitLab From 67e2c47e2ff1222e141c51ac2794aa9b9207a573 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Wed, 24 Oct 2018 13:30:35 -0700 Subject: [PATCH 0096/1078] Disable denormal test on ARM until the architecture is supported. --- tensorflow/python/kernel_tests/denormal_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/denormal_test.py b/tensorflow/python/kernel_tests/denormal_test.py index 71a528c4aa..9f65a18c14 100644 --- a/tensorflow/python/kernel_tests/denormal_test.py +++ b/tensorflow/python/kernel_tests/denormal_test.py @@ -35,8 +35,8 @@ class DenormalTest(test.TestCase): self.assertEqual(tiny, tiny / 16 * 16) def _flushDenormalsTest(self, use_gpu, dtypes): - if platform.machine() == "ppc64le" or platform.machine() == "s390x": - # Disabled denormal_test on power/s390x platform + if platform.machine() == "ppc64le" or platform.machine() == "s390x" or platform.machine() == "aarch64": + # Disabled denormal_test on power/s390x/aarch64 platform # Check relevant discussion - https://github.com/tensorflow/tensorflow/issues/11902 return with self.cached_session(use_gpu=use_gpu): -- GitLab From da1b48ddd04875995098f3c5c3fe0740b72518b8 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Wed, 24 Oct 2018 16:39:49 -0700 Subject: [PATCH 0097/1078] Declare that stateless random ops are not differentiable in C++ code. (#23227) PiperOrigin-RevId: 215935319 --- tensorflow/core/BUILD | 1 + tensorflow/core/ops/stateless_random_grad.cc | 23 ++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 tensorflow/core/ops/stateless_random_grad.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 6a3ee3c1cb..900a0e11c4 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1242,6 +1242,7 @@ cc_library( srcs = [ "ops/math_grad.cc", "ops/random_grad.cc", + "ops/stateless_random_grad.cc", ], linkstatic = 1, # Needed since alwayslink is broken in bazel b/27630669 visibility = ["//visibility:public"], diff --git a/tensorflow/core/ops/stateless_random_grad.cc b/tensorflow/core/ops/stateless_random_grad.cc new file mode 100644 index 0000000000..331e1d0152 --- /dev/null +++ b/tensorflow/core/ops/stateless_random_grad.cc @@ -0,0 +1,23 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/function.h" + +namespace tensorflow { +REGISTER_OP_NO_GRADIENT("StatelessRandomUniform"); +REGISTER_OP_NO_GRADIENT("StatelessRandomNormal"); +REGISTER_OP_NO_GRADIENT("StatelessTruncatedNormal"); +REGISTER_OP_NO_GRADIENT("StatelessMultinomial"); +} // end namespace tensorflow -- GitLab From e72c9ebe78a119715541f40ea99b1a8c89639968 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Wed, 24 Oct 2018 17:46:03 -0700 Subject: [PATCH 0098/1078] 1.12.0-rc2 cherry-pick request: Various XLA scatter improvements. (#23235) * [XLA] Update Tf2Xla bridge to use Scatter HLO. PiperOrigin-RevId: 215687800 * [XLA:GPU] Add an implementation of scatter for GPU This simple has a kernel that runs on every element of the updates tensor, figure out the right indices to perform the update, and applies it with an atomic operation. Currently we emit a CAS for plain (i.e. non-add) updates, which is inefficient. Also TuplePointsToAnalysis doesn't know that it should alias the operand and output buffers of a scatter, which would avoid a copy. PiperOrigin-RevId: 216412467 * [XLA] Allow scatter to share the operand buffer with the output This avoids a copy. PiperOrigin-RevId: 216437329 * [XLA:GPU] Elide the SequentialThunk when emitting scatter with no copy We have a 1-element thunk sequence if we're not copying. That's still two thunks and hlo profiling gets confused if it sees two thunks for the same instruction and one of them claims to be the whole instruction. PiperOrigin-RevId: 216448063 * [XLA:GPU] Allow input fusion into scatter We fuse everything into the scatter now, and emit two kernels. The first kernel fills the output buffer with the computation fused into the scatter operand. The second kernel is a regular scatter, which also contains the fused operations from the updates and scatter_indices inputs. PiperOrigin-RevId: 216624225 * [XLA:GPU] Adding a test case for Scatter where GPU implementation fails. PiperOrigin-RevId: 216798034 * [XLA:GPU] Fix scatter oob check computation This was comparing the index after adding it to the window, and then comparing against the window dimension. This means that the bounds check was only correct for the first element of a window. Instead compare the scatter index, which is the same for all elements of a window. PiperOrigin-RevId: 216921512 * [XLA:GPU] Elide tuple roots of the entry computation The tuple buffer is never read, so stop emitting code to fill it. A typical root tuple consists of a H2D memcpy and a host callback, both of which are somewhat slow. This helps tiny models and inference benchmarks, where the host/device syncs can be a significant part of the runtime of the entire computation. PiperOrigin-RevId: 216968475 --- tensorflow/compiler/tf2xla/lib/scatter.cc | 213 +++++++++------- tensorflow/compiler/tf2xla/lib/scatter.h | 6 +- tensorflow/compiler/xla/client/xla_builder.cc | 3 + tensorflow/compiler/xla/service/gpu/BUILD | 1 - .../xla/service/gpu/instruction_fusion.cc | 9 +- .../service/gpu/instruction_fusion_test.cc | 39 +++ .../xla/service/gpu/ir_emitter_unnested.cc | 241 +++++++++++++++++- .../xla/service/gpu/ir_emitter_unnested.h | 9 + .../xla/service/gpu/nvptx_compiler.cc | 3 - .../xla/service/hlo_dataflow_analysis.cc | 1 + .../xla/service/hlo_dataflow_analysis_test.cc | 38 +++ .../compiler/xla/service/hlo_matchers.h | 1 + tensorflow/compiler/xla/service/hlo_module.cc | 3 +- tensorflow/compiler/xla/service/inliner.cc | 32 ++- .../compiler/xla/service/inliner_test.cc | 30 +++ .../compiler/xla/service/layout_assignment.cc | 2 +- .../xla/service/tuple_points_to_analysis.cc | 1 + .../service/tuple_points_to_analysis_test.cc | 38 +++ tensorflow/compiler/xla/tests/scatter_test.cc | 62 +++++ 19 files changed, 613 insertions(+), 119 deletions(-) diff --git a/tensorflow/compiler/tf2xla/lib/scatter.cc b/tensorflow/compiler/tf2xla/lib/scatter.cc index 38dfde165d..2b1c2ced92 100644 --- a/tensorflow/compiler/tf2xla/lib/scatter.cc +++ b/tensorflow/compiler/tf2xla/lib/scatter.cc @@ -38,12 +38,10 @@ xla::StatusOr XlaScatter( combiner, xla::XlaBuilder* builder) { TF_ASSIGN_OR_RETURN(xla::Shape buffer_shape, builder->GetShape(buffer)); - TF_RETURN_IF_ERROR(builder->GetShape(updates).status()); + TF_ASSIGN_OR_RETURN(xla::Shape updates_shape, builder->GetShape(updates)); TF_ASSIGN_OR_RETURN(xla::Shape indices_shape, builder->GetShape(indices)); absl::Span indices_dims = xla::AsInt64Slice(indices_shape.dimensions()); - absl::Span buffer_dims = - xla::AsInt64Slice(buffer_shape.dimensions()); // If the indices are N-dimensional, the minor dimension of indices contains // the indices to update. Otherwise the indices are all scalars. @@ -81,104 +79,129 @@ xla::StatusOr XlaScatter( } } - // Shape of the non-indexed dimensions of the buffer. - std::vector buffer_shape_post_axes( - buffer_dims.begin() + num_index_dims, buffer_dims.end()); - - // Flatten the major dimensions of indices and updates into a single dimension - // for ease of iteration. - std::vector flat_indices_shape({num_indices}); - if (indices_are_vectors) { - flat_indices_shape.push_back(num_index_dims); + // Example of a 1-D scatter that updates two [3,1] tensors in a tensor of + // shape [3,3]: + // NOTE: ***This case will not be generated by any of the tf.scatter ops.*** + // + // operand = s32[3,3] parameter(0) + // indices = s32[2] parameter(1) + // updates = s32[3,2] parameter(2) + // scatter = s32[3,3] scatter(operand, indices, updates), + // to_apply=update_computation, + // update_window_dims={0}, + // inserted_window_dims={1}, + // scatter_dims_to_operand_dims={1}, + // index_vector_dim=1 + // + // + // Example of a 1-D scatter that updates two [1,3] tensors in a tensor of + // shape [3,3]: + // + // operand = s32[3,3] parameter(0) + // indices = s32[2] parameter(1) + // updates = s32[2,3] parameter(2) + // scatter = s32[3,3] scatter(operand, indices, updates), + // to_apply=update_computation, + // update_window_dims={1}, + // inserted_window_dims={0}, + // scatter_dims_to_operand_dims={0}, + // index_vector_dim=1 + // + // + // Example of an N-D scatter updating slices of shape [1,1,2] in a tensor of + // shape [3,3,2] + // + // operand = s32[3,3,2] parameter(0) + // indices = s32[2,2] parameter(1) + // updates = s32[2,2] parameter(2) + // scatter = s32[3,3,2] scatter(operand, indices, updates), + // to_apply=update_computation, + // update_window_dims={1}, + // inserted_window_dims={0,1}, + // scatter_dims_to_operand_dims={0,1}, + // index_vector_dim=1 + // + // + // Example of a scatter updating slices of shape [] in a tensor of shape [1,1] + // + // operand = s32[1,1] parameter(0) + // indices = s32[1] parameter(1) + // updates = s32[1] parameter(2) + // scatter = s32[1,1] scatter(operand, indices, updates), + // to_apply=update_computation, + // update_window_dims={}, + // inserted_window_dims={0,1}, + // scatter_dims_to_operand_dims={0}, + // index_vector_dim=1 + // Note that updates operand would be broadcasted into [1] in this case. + // + + xla::ScatterDimensionNumbers dim_numbers; + dim_numbers.set_index_vector_dim(indices_are_vectors + ? indices_shape.dimensions_size() - 1 + : indices_shape.dimensions_size()); + + int64 updates_rank = xla::ShapeUtil::Rank(updates_shape); + int64 buffer_rank = xla::ShapeUtil::Rank(buffer_shape); + int64 num_window_dims_in_updates = buffer_rank - num_index_dims; + + // If the rank of `updates` is 0 and does not match the expected rank of + // updates, broadcast `updates` to the expected shape of updates. + auto new_updates = updates; + std::vector expected_updates_dims(indices_dims.begin(), + indices_dims.end()); + for (int64 dim = num_index_dims; dim < buffer_rank; ++dim) { + expected_updates_dims.push_back(buffer_shape.dimensions(dim)); + } + int64 expected_updates_rank = expected_updates_dims.size(); + if (updates_rank == 0 && expected_updates_rank != 0) { + new_updates = xla::Broadcast(updates, expected_updates_dims); + TF_ASSIGN_OR_RETURN(updates_shape, builder->GetShape(new_updates)); + updates_rank = xla::ShapeUtil::Rank(updates_shape); } - std::vector flat_updates_shape({num_indices}); - flat_updates_shape.insert(flat_updates_shape.end(), - buffer_shape_post_axes.begin(), - buffer_shape_post_axes.end()); - - // Construct the initial values of the loop-carried Tensors. - auto flat_indices = xla::Reshape(indices, flat_indices_shape); - auto flat_updates = xla::Reshape(updates, flat_updates_shape); - auto init = {flat_indices, flat_updates, buffer}; - - // Constructs the loop body. The implementation of scatter is essentially: - // for i in range(num_indices): - // index = dynamic-slice(indices, i) - // update = dynamic-slice(updates, i) - // buffer = dynamic-update-slice(buffer, update, index) - auto body_fn = [&](xla::XlaOp i, absl::Span loop_vars, - xla::XlaBuilder* body_builder) { - auto indices = loop_vars[0]; - auto updates = loop_vars[1]; - auto buffer = loop_vars[2]; - - auto zero_index = xla::ConstantLiteral( - body_builder, xla::LiteralUtil::Zero(indices_shape.element_type())); - - // Slice the i-th index from the indices array. - xla::XlaOp index; - auto indices_offset = xla::Reshape(i, {1}); - if (indices_are_vectors) { - indices_offset = xla::Pad(indices_offset, zero_index, - xla::MakeEdgePaddingConfig({{0, 1}})); - - index = xla::DynamicSlice(indices, indices_offset, {1, num_index_dims}); - index = xla::Collapse(index, {0, 1}); - } else { - index = xla::DynamicSlice(indices, indices_offset, {1}); + if (updates_rank > 0) { + for (int64 i = (updates_rank - num_window_dims_in_updates); + i < updates_rank; ++i) { + dim_numbers.add_update_window_dims(i); } + } - // Discard updates with negative indices, since some users expect this. - auto index_in_range = xla::ReduceAll( - xla::Le(zero_index, index), xla::ConstantR0(body_builder, true), - xla::CreateScalarAndComputation(xla::PRED, body_builder)); - - // Make the index in bounds to prevent implementation defined behavior. - index = xla::Max(index, zero_index); - index = xla::Pad( - index, zero_index, - xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}})); - - // Slice the i-th index from the updates array. - auto updates_offset = xla::Reshape(i, {1}); - updates_offset = xla::Pad( - updates_offset, zero_index, - xla::MakeEdgePaddingConfig({{0, buffer_shape_post_axes.size()}})); - std::vector flat_updates_slice_shape({1}); - flat_updates_slice_shape.insert(flat_updates_slice_shape.end(), - buffer_shape_post_axes.begin(), - buffer_shape_post_axes.end()); - auto update = - xla::DynamicSlice(updates, updates_offset, flat_updates_slice_shape); - - // Unflatten the major (iteration) dimensions of the slice to their - // original shape. - std::vector updates_slice_shape(num_index_dims, 1); - updates_slice_shape.insert(updates_slice_shape.end(), - buffer_shape_post_axes.begin(), - buffer_shape_post_axes.end()); - update = xla::Reshape(update, updates_slice_shape); - - // Apply the update to the buffer. If there is a combiner, use it to merge - // the current values with the update. - auto current_value = xla::DynamicSlice(buffer, index, updates_slice_shape); + for (int64 i = 0; i < num_index_dims; ++i) { + dim_numbers.add_inserted_window_dims(i); + dim_numbers.add_scatter_dims_to_operand_dims(i); + } + + // Build the combiner computation. + xla::XlaComputation combiner_computation; + { + xla::XlaBuilder cb("scatter-combiner"); + auto xla_scalar_shape = + xla::ShapeUtil::MakeShape(buffer_shape.element_type(), {}); + auto p0 = xla::Parameter(&cb, 0, xla_scalar_shape, "p0"); + auto p1 = xla::Parameter(&cb, 1, xla_scalar_shape, "p1"); if (combiner) { - update = combiner(current_value, update, body_builder); + combiner(p0, p1, &cb); } - // Use the current value instead of the update if the index is out of - // bounds. - update = xla::Select(index_in_range, update, current_value); - // Apply the update. - buffer = xla::DynamicUpdateSlice(buffer, update, index); - - return std::vector{indices, updates, buffer}; - }; - - TF_ASSIGN_OR_RETURN(auto outputs, - XlaForEachIndex(num_indices, indices_shape.element_type(), - body_fn, init, "scatter", builder)); - return outputs[2]; + combiner_computation = cb.Build().ConsumeValueOrDie(); + } + + VLOG(3) << "Scatter op:"; + VLOG(3) << " Input: " << xla::ShapeUtil::HumanString(buffer_shape); + VLOG(3) << " Indices: " << xla::ShapeUtil::HumanString(indices_shape); + VLOG(3) << " Updates: " << xla::ShapeUtil::HumanString(updates_shape); + VLOG(3) << " Scatter Dimension Numbers: "; + VLOG(3) << " index_vector_dim: " << dim_numbers.index_vector_dim(); + VLOG(3) << " update_window_dims: [" + << absl::StrJoin(dim_numbers.update_window_dims(), ",") << "]"; + VLOG(3) << " inserted_window_dims: [" + << absl::StrJoin(dim_numbers.inserted_window_dims(), ",") << "]"; + VLOG(3) << " scatter_dims_to_operand_dims: [" + << absl::StrJoin(dim_numbers.scatter_dims_to_operand_dims(), ",") + << "]"; + + return xla::Scatter(buffer, indices, new_updates, combiner_computation, + dim_numbers); } } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/lib/scatter.h b/tensorflow/compiler/tf2xla/lib/scatter.h index 13a5f1b850..4cf478c4b9 100644 --- a/tensorflow/compiler/tf2xla/lib/scatter.h +++ b/tensorflow/compiler/tf2xla/lib/scatter.h @@ -34,7 +34,11 @@ namespace tensorflow { // Otherwise, `indices_are_vectors`, then indices are multidimensional and the // minor dimension of `indices` represents a vector of indices. // -// If any indices are negative, the corresponding update is discarded. +// If `updates` is a scalar, then it will be broadcasted into the expected shape +// of updates. +// +// If any part of the update region is out-of-bounds, the corresponding update +// is discarded. // // If a `combiner` is provided, updates are combined with the existing values in // the buffer using the combiner function. Otherwise, the updates replace the diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index e0ec91dba1..d196252db1 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -208,6 +208,9 @@ void XlaBuilder::IsConstantVisitor(const int64 op_handle, case HloOpcode::kWhile: // TODO(b/32495713): We aren't checking the condition and body // computations themselves. + case HloOpcode::kScatter: + // TODO(b/32495713): We aren't checking the embedded computation in + // Scatter. case HloOpcode::kSend: case HloOpcode::kRecv: case HloOpcode::kParameter: diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index a838464cae..dde0cc7459 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -704,7 +704,6 @@ cc_library( "//tensorflow/compiler/xla/service:llvm_compiler", "//tensorflow/compiler/xla/service:reduce_precision_insertion", "//tensorflow/compiler/xla/service:reshape_mover", - "//tensorflow/compiler/xla/service:scatter_expander", "//tensorflow/compiler/xla/service:transpose_folding", "//tensorflow/compiler/xla/service:tuple_simplifier", "//tensorflow/compiler/xla/service:while_loop_constant_sinking", diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index b61f038739..1d66787d89 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -47,6 +47,7 @@ bool IsFusible(const HloInstruction& hlo) { hlo.opcode() == HloOpcode::kReduce || hlo.opcode() == HloOpcode::kReduceWindow || hlo.opcode() == HloOpcode::kReshape || + hlo.opcode() == HloOpcode::kScatter || hlo.opcode() == HloOpcode::kSlice || hlo.opcode() == HloOpcode::kTranspose; } @@ -223,6 +224,11 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer, return false; } + // Scatter is only supported at the root of a kInput fusion. + if (producer->opcode() == HloOpcode::kScatter) { + return false; + } + // Do not fuse into reduce input fusions if the resulting kernel would suffer // from poor data locality (due to unfriendly input layouts). if (IsInputFusibleReduction(*consumer) && @@ -285,7 +291,8 @@ bool GpuInstructionFusion::ShouldFuseIntoMultiOutput(HloInstruction* consumer, HloInstruction::FusionKind GpuInstructionFusion::ChooseKind( const HloInstruction* producer, const HloInstruction* consumer) { - if (IsReductionToVector(*consumer)) { + if (IsReductionToVector(*consumer) || + consumer->opcode() == HloOpcode::kScatter) { return HloInstruction::FusionKind::kInput; } if (producer->opcode() == HloOpcode::kDot || diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index 96bfe0c12e..fd9b7cee80 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -709,5 +709,44 @@ TEST_F(InstructionFusionTest, AvoidsLargeFusion) { } } +TEST_F(InstructionFusionTest, FuseIntoScatter) { + auto module = ParseHloString(R"( + HloModule test_module + + add { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT add = f32[] add(lhs, rhs) + } + + ENTRY FuseIntoScatter { + p0 = s32[3,3] parameter(0) + operand = s32[3,3] add(p0, p0) + p1 = s32[2] parameter(1) + indices = s32[2] add(p1, p1) + p2 = s32[2,3] parameter(2) + updates = s32[2,3] add(p2, p2) + scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=add, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + ROOT add = s32[3,3] add(scatter, scatter) + })") + .ValueOrDie(); + + EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Add(op::Fusion(), op::Fusion())); + EXPECT_EQ(root->operand(0)->fusion_kind(), + HloInstruction::FusionKind::kInput); + EXPECT_THAT(root->operand(0)->fused_expression_root(), + op::Scatter(op::Add(), op::Add(), op::Add())); +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index c792dd2ddb..2951f7a65f 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -493,13 +493,68 @@ Status IrEmitterUnnested::HandleFft(HloInstruction* fft) { Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { HloInstruction* root = fusion->fused_expression_root(); - // HandleFusion specializes reduction from a multi-dimensional array to a 1D - // array. The specialized version requires a initializer thunk that - // initializes the output array to the initial value of the reduce. if (HloInstruction::FusionKind::kInput == fusion->fusion_kind()) { switch (root->opcode()) { + case HloOpcode::kScatter: { + std::vector> thunks; + // The initialization from 'operand' is using different loop bounds, so + // emit it in a separate kernel. Treat it like a loop fusion, writing to + // the output buffer. + { + int unroll_factor = ComputeMaxUnrollFactor(fusion); + thunks.push_back(BuildKernelThunk( + fusion, /*implements_whole_instruction=*/false, unroll_factor)); + + std::vector operand_parameter_arrays; + for (HloInstruction* operand : fusion->operands()) { + operand_parameter_arrays.push_back(GetIrArray(*operand, *fusion)); + } + GpuElementalIrEmitter operand_elemental_emitter( + hlo_module_config_, ir_emitter_context_->llvm_module(), &b_, + GetNestedComputer()); + FusedIrEmitter operand_fused_emitter(operand_parameter_arrays, + &operand_elemental_emitter); + TF_RETURN_IF_ERROR( + root->mutable_operand(0)->Accept(&operand_fused_emitter)); + + TF_RETURN_IF_ERROR(EmitTargetElementLoopInThunk( + *fusion, operand_fused_emitter.GetGenerator(root->operand(0)), + static_cast(thunks.back().get()))); + } + + // Now build the actual scatter, reading and writing to the freshly + // filled output buffer. + { + thunks.push_back( + BuildKernelThunk(fusion, + /*implements_whole_instruction=*/false)); + // Spin up a new fused emitter for the scatter kernel and emit it. + std::vector scatter_parameter_arrays; + for (HloInstruction* operand : fusion->operands()) { + scatter_parameter_arrays.push_back(GetIrArray(*operand, *fusion)); + } + GpuElementalIrEmitter scatter_elemental_emitter( + hlo_module_config_, ir_emitter_context_->llvm_module(), &b_, + GetNestedComputer()); + FusedIrEmitter scatter_fused_emitter(scatter_parameter_arrays, + &scatter_elemental_emitter); + TF_RETURN_IF_ERROR(root->Accept(&scatter_fused_emitter)); + TF_RETURN_IF_ERROR(EmitScatter( + thunks.back().get(), root, + /*scatter_indices_gen=*/ + scatter_fused_emitter.GetGenerator(root->operand(1)), + /*updates_gen=*/ + scatter_fused_emitter.GetGenerator(root->operand(2)))); + } + thunk_sequence_->emplace_back( + absl::make_unique(std::move(thunks), fusion)); + return Status::OK(); + } case HloOpcode::kTuple: case HloOpcode::kReduce: { + // HandleFusion specializes reduction from a multi-dimensional array to + // a 1D array. The specialized version requires a initializer thunk that + // initializes the output array to the initial value of the reduce. if (root->opcode() == HloOpcode::kReduce && ShapeUtil::IsTuple(root->shape())) { // TODO(b/112040122): Support variadic reduce. @@ -1672,6 +1727,14 @@ Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) { } Status IrEmitterUnnested::HandleTuple(HloInstruction* tuple) { + // For the root node of the entry computation we can elide writing the tuple + // buffer. We can always figure out the contents of the tuples from buffer + // assignment because we insert copies to ensure non-ambiguous output buffers. + // GpuExecutable never reads the tuple buffer. + if (tuple == + tuple->parent()->parent()->entry_computation()->root_instruction()) { + return Status::OK(); + } bool all_tuple_elements_have_buffer = absl::c_all_of(tuple->operands(), [&](HloInstruction* tuple_element) { return ir_emitter_context_->buffer_assignment() @@ -1958,6 +2021,178 @@ Status IrEmitterUnnested::HandleRng(HloInstruction* rng) { return Status::OK(); } +Status IrEmitterUnnested::HandleScatter(HloInstruction* scatter) { + const HloInstruction* operand = scatter->operand(0); + const HloInstruction* scatter_indices = scatter->operand(1); + const HloInstruction* updates = scatter->operand(2); + + std::vector> thunks; + + // Copy the operand into the output if it's not the same buffer already. + auto operand_buffer = GetAllocationSlice(*operand); + auto destination_buffer = GetAllocationSlice(*scatter); + if (operand_buffer != destination_buffer) { + thunks.push_back(absl::make_unique( + /*source_address=*/operand_buffer, + /*destination_buffer=*/destination_buffer, + /*mem_size=*/ShapeUtil::ByteSizeOf(operand->shape()), scatter)); + } + + thunks.push_back( + BuildKernelThunk(scatter, + /*implements_whole_instruction=*/thunks.empty())); + + TF_RETURN_IF_ERROR( + EmitScatter(thunks.back().get(), scatter, + /*scatter_indices_gen=*/ + [=](const IrArray::Index& index) { + return GetIrArray(*scatter_indices, *scatter) + .EmitReadArrayElement(index, &b_, "scatter_index"); + }, + /*updates_gen=*/ + [=](const IrArray::Index& index) { + return GetIrArray(*updates, *scatter) + .EmitReadArrayElement(index, &b_, "update"); + })); + + // Elide the sequential thunk if there's no copy. + if (thunks.size() == 1) { + thunk_sequence_->push_back(std::move(thunks[0])); + } else { + thunk_sequence_->emplace_back( + absl::make_unique(std::move(thunks), scatter)); + } + return Status::OK(); +} + +Status IrEmitterUnnested::EmitScatter( + Thunk* thunk, HloInstruction* scatter, + const llvm_ir::ElementGenerator& scatter_indices_gen, + const llvm_ir::ElementGenerator& updates_gen) { + const HloInstruction* operand = scatter->operand(0); + const HloInstruction* scatter_indices = scatter->operand(1); + const HloInstruction* updates = scatter->operand(2); + const ScatterDimensionNumbers& dim_numbers = + scatter->scatter_dimension_numbers(); + CHECK(ShapeUtil::Equal(scatter->shape(), operand->shape())); + + auto loop_body_emitter = [&](const IrArray::Index& index) -> Status { + std::vector raw_window_multidim; + std::vector input_scatter_multidim; + std::vector raw_window_bounds; + + // Partition the index into window indices and scatter indices. + for (int64 i = 0, e = index.size(); i != e; ++i) { + // For window indices also remember the window size, this comes in handy + // later. + if (absl::c_binary_search(dim_numbers.update_window_dims(), i)) { + raw_window_multidim.push_back(index[i]); + raw_window_bounds.push_back(updates->shape().dimensions(i)); + } else { + input_scatter_multidim.push_back(index[i]); + } + } + DCHECK_EQ(raw_window_multidim.size(), + dim_numbers.update_window_dims_size()); + + // Apply inserted_window_dims to the window dimensions. + int64 raw_window_multidim_idx = 0; + std::vector input_window_multidim; + std::vector input_window_bounds; + for (int64 i = 0, e = ShapeUtil::Rank(operand->shape()); i != e; ++i) { + if (absl::c_binary_search(dim_numbers.inserted_window_dims(), i)) { + input_window_bounds.push_back(1); // Trivial dimension. + input_window_multidim.push_back(index.GetConstantWithIndexType(0)); + } else { + input_window_bounds.push_back( + raw_window_bounds[raw_window_multidim_idx]); + input_window_multidim.push_back( + raw_window_multidim[raw_window_multidim_idx]); + ++raw_window_multidim_idx; + } + } + DCHECK_EQ(input_window_multidim.size(), ShapeUtil::Rank(operand->shape())); + + // Insert a 1 dimension at the end if index_vector_dim requests one. + Shape scatter_indices_shape = scatter_indices->shape(); + if (dim_numbers.index_vector_dim() == + ShapeUtil::Rank(scatter_indices_shape)) { + scatter_indices_shape.add_dimensions(1); + scatter_indices_shape.mutable_layout()->add_minor_to_major( + dim_numbers.index_vector_dim()); + } + + // Now load the indices corresponding to the current window from + // scatter_indices. + llvm_ir::IrArray::Index raw_scatter_index_index(input_scatter_multidim, + index.GetType()); + raw_scatter_index_index.InsertAt(dim_numbers.index_vector_dim(), nullptr); + llvm::Value* is_in_bounds = b_.getTrue(); + for (int64 i = 0, e = dim_numbers.scatter_dims_to_operand_dims_size(); + i != e; ++i) { + // Our index is stored along index_vector_dim, insert that into the lookup + // index into scatter_indices. + raw_scatter_index_index[dim_numbers.index_vector_dim()] = + raw_scatter_index_index.GetConstantWithIndexType(i); + + int64 operand_dim = dim_numbers.scatter_dims_to_operand_dims(i); + TF_ASSIGN_OR_RETURN( + llvm::Value* const loaded_scatter_index, + scatter_indices_gen(raw_scatter_index_index.SourceIndexOfReshape( + scatter_indices_shape, scatter_indices->shape(), &b_))); + // And add the index to our window index. This yields the output index. + llvm::Value* casted_scatter_index = + IntCast(loaded_scatter_index, index.GetType(), + /*isSigned=*/true); + llvm::Value* dim_offset = + Add(input_window_multidim[operand_dim], casted_scatter_index); + input_window_multidim[operand_dim] = dim_offset; + + // Also do the bounds check now. + int64 max_index = operand->shape().dimensions(operand_dim) - + input_window_bounds[operand_dim] + 1; + // is_in_bounds = index >= 0 && index < dim_size-window_size+1 + // --> index u< dim_size-window_size+1 + is_in_bounds = + And(is_in_bounds, ICmpULT(casted_scatter_index, + index.GetConstantWithIndexType(max_index))); + } + + llvm_ir::LlvmIfData if_window_in_bounds_data = llvm_ir::EmitIfThenElse( + is_in_bounds, "scatter.in_bounds", &b_, /*emit_else=*/false); + llvm_ir::SetToFirstInsertPoint(if_window_in_bounds_data.true_block, &b_); + // All done, now just read from the calculated input from the window, and do + // an atomic store to the calculated location in the output. + llvm_ir::IrArray::Index input_window_index(input_window_multidim, + index.GetType()); + HloInstruction* output_hlo = + scatter->IsFused() ? scatter->parent()->FusionInstruction() : scatter; + llvm::Value* output_address = + GetIrArray(*output_hlo, *output_hlo) + .EmitArrayElementAddress(input_window_index, &b_); + llvm::Value* input_address = Alloca(llvm_ir::PrimitiveTypeToIrType( + updates->shape().element_type(), module_)); + TF_ASSIGN_OR_RETURN(llvm::Value* const input_ir_value, updates_gen(index)); + Store(input_ir_value, input_address); + return EmitAtomicOperationForNestedComputation( + *scatter->to_apply(), output_address, input_address); + }; + + // Launch a kernel that reads every element in the updates tensor. We could + // also do one kernel per window instead if bounds checks turn out to be a + // bottleneck. + LaunchDimensions launch_dimensions = CalculateLaunchDimensions( + updates->shape(), ir_emitter_context_->device_description()); + UpdateLaunchDimensions(launch_dimensions, thunk, + ir_emitter_context_->llvm_module()); + + return ParallelLoopEmitter(loop_body_emitter, updates->shape(), + launch_dimensions, &b_) + .EmitLoop(IrName(scatter), + GetIndexTypeForKernel(scatter, launch_dimensions.launch_bound(), + &b_)); +} + Status IrEmitterUnnested::HandleSelect(HloInstruction* select) { thunk_sequence_->push_back( BuildKernelThunk(select, /*implements_whole_instruction=*/true)); diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h index bd5db72051..93f11c069a 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.h @@ -76,6 +76,7 @@ class IrEmitterUnnested : public IrEmitter { Status HandleInfeed(HloInstruction* xla_infeed) override; Status HandleOutfeed(HloInstruction* outfeed) override; Status HandleRng(HloInstruction* random) override; + Status HandleScatter(HloInstruction* scatter) override; Status HandleSelect(HloInstruction* select) override; Status HandleSort(HloInstruction* sort) override; Status HandleTupleSelect(HloInstruction* tuple_select) override; @@ -184,6 +185,14 @@ class IrEmitterUnnested : public IrEmitter { absl::Span> extra_output_gens); + // Emits code for an in-place scatter, modifying `thunk`s launch dimensions in + // the process. `scatter` may be fused, scatter indices are taken from + // `scatter_indices_gen`, updates from`updates_gen`. The output buffer is + // expected to have the operand values in it already. + Status EmitScatter(Thunk* thunk, HloInstruction* scatter, + const llvm_ir::ElementGenerator& scatter_indices_gen, + const llvm_ir::ElementGenerator& updates_gen); + // Returns true if a 0-2-1 tiling algorithm is already used to emit the kernel // for the hlo instruction. bool CheckAndEmitHloWithTile021(HloInstruction* hlo); diff --git a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc index b4ae2e42c7..89c5f2b128 100644 --- a/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/nvptx_compiler.cc @@ -75,7 +75,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/compiler/xla/service/reduce_precision_insertion.h" #include "tensorflow/compiler/xla/service/reshape_mover.h" -#include "tensorflow/compiler/xla/service/scatter_expander.h" #include "tensorflow/compiler/xla/service/transpose_folding.h" #include "tensorflow/compiler/xla/service/tuple_simplifier.h" #include "tensorflow/compiler/xla/service/while_loop_constant_sinking.h" @@ -176,8 +175,6 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec, // elimination has to come after that pass. pipeline.AddPass(); - pipeline.AddPass(); - pass.AddPass( /*is_layout_sensitive=*/false, [](const Shape&, const Shape&) { return false; }); diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index 44cde4a3d2..1f7d4205ab 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -1072,6 +1072,7 @@ bool HloDataflowAnalysis::CanShareOperandBufferWithUser( } if (user->opcode() == HloOpcode::kDynamicUpdateSlice || + user->opcode() == HloOpcode::kScatter || user->opcode() == HloOpcode::kWhile) { // We eliminated other users in BufferLiveness::live_range_strictly_before, // so here we just need to check that the use is at operand index 0. diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 510d6360a1..d27786d160 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -2283,6 +2283,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) { dataflow_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {})); } +TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) { + const char* hlo_text = R"( + HloModule TensorFlowScatterV1 + + update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) + } + + ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + updates = s32[2,3] parameter(2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + } + )"; + TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); + computation_ = module_->entry_computation(); + RunAnalysis(); + + HloInstruction* operand_param = computation_->parameter_instruction(0); + HloInstruction* indices_param = computation_->parameter_instruction(1); + HloInstruction* updates_param = computation_->parameter_instruction(2); + HloInstruction* scatter = computation_->root_instruction(); + + EXPECT_TRUE(dataflow_analysis_->CanShareOperandBufferWithUser( + operand_param, {}, scatter, {})); + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser( + indices_param, {}, scatter, {})); + EXPECT_FALSE(dataflow_analysis_->CanShareOperandBufferWithUser( + updates_param, {}, scatter, {})); +} + TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) { auto builder = HloComputation::Builder(TestName()); diff --git a/tensorflow/compiler/xla/service/hlo_matchers.h b/tensorflow/compiler/xla/service/hlo_matchers.h index 5502e565b6..ab901b435a 100644 --- a/tensorflow/compiler/xla/service/hlo_matchers.h +++ b/tensorflow/compiler/xla/service/hlo_matchers.h @@ -216,6 +216,7 @@ HLO_MATCHER(Remainder); HLO_MATCHER(Reshape); HLO_MATCHER(Reverse); HLO_MATCHER(Rng); +HLO_MATCHER(Scatter); HLO_MATCHER(Select); HLO_MATCHER(SelectAndScatter); HLO_MATCHER(Send); diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index 7527e35c95..93e04eb3db 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -146,7 +146,8 @@ void HloModule::ReplaceComputations( case HloOpcode::kCall: case HloOpcode::kMap: case HloOpcode::kReduce: - case HloOpcode::kReduceWindow: { + case HloOpcode::kReduceWindow: + case HloOpcode::kScatter: { HloComputation* new_arg = tensorflow::gtl::FindWithDefault( replacements, instruction->to_apply(), nullptr); if (new_arg != nullptr) { diff --git a/tensorflow/compiler/xla/service/inliner.cc b/tensorflow/compiler/xla/service/inliner.cc index 5fd779ebf9..50c408f5bb 100644 --- a/tensorflow/compiler/xla/service/inliner.cc +++ b/tensorflow/compiler/xla/service/inliner.cc @@ -71,26 +71,23 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) { // profitability model for inlining is defined. if (hlo_query::AllOperandsAreParameters(root)) { if (root.opcode() == HloOpcode::kFusion || - root.opcode() == HloOpcode::kParameter || root.opcode() == HloOpcode::kTrace) { // Cloning not supported for these instructions. return Status::OK(); } VLOG(10) << "inlining map({X ... Y}, op) => : op(X ... Y) with function " << root.ToShortString(); - // If the input is a constant then the shape of the constant could be - // different than the map shape. Hence, a broadcast is needed, else the - // cloned operand with new shape and operands work. - if (root.opcode() != HloOpcode::kConstant) { - std::vector params; - for (int64 o = 0; o < root.operands().size(); o++) { - params.push_back(map->operands()[root.operand(o)->parameter_number()]); - } - HloInstruction* placed_instruction = computation_->AddInstruction( - root.CloneWithNewOperands(map->shape(), params)); + if (root.opcode() == HloOpcode::kParameter) { + // If the root is a parameter, then use the corresponding operand as the + // result of the computation. TF_RETURN_IF_ERROR( - computation_->ReplaceInstruction(map, placed_instruction)); - } else { + map->ReplaceAllUsesWith(map->operands()[root.parameter_number()])); + TF_RETURN_IF_ERROR(computation_->RemoveInstruction(map)); + } else if (root.opcode() == HloOpcode::kConstant) { + // If the input is a constant then the shape of the constant could be + // different than the map shape. Hence, a broadcast is needed, else the + // cloned operand with new shape and operands work. + // // The constant is in an embedded computation and needs to be recreated // as part of the computation that the broadcast is inserted into. HloInstruction* constant = computation_->AddInstruction(root.Clone()); @@ -98,6 +95,15 @@ Status InlinerVisitor::HandleMap(HloInstruction* map) { HloInstruction::CreateBroadcast(map->shape(), constant, {})); TF_RETURN_IF_ERROR( computation_->ReplaceInstruction(map, placed_instruction)); + } else { + std::vector params; + for (int64 o = 0; o < root.operands().size(); o++) { + params.push_back(map->operands()[root.operand(o)->parameter_number()]); + } + HloInstruction* placed_instruction = computation_->AddInstruction( + root.CloneWithNewOperands(map->shape(), params)); + TF_RETURN_IF_ERROR( + computation_->ReplaceInstruction(map, placed_instruction)); } changed_ = true; return Status::OK(); diff --git a/tensorflow/compiler/xla/service/inliner_test.cc b/tensorflow/compiler/xla/service/inliner_test.cc index 7e967f035c..98e0f2cfd7 100644 --- a/tensorflow/compiler/xla/service/inliner_test.cc +++ b/tensorflow/compiler/xla/service/inliner_test.cc @@ -146,6 +146,36 @@ TEST_F(InlinerTest, MapSubtractOppositeOrder) { EXPECT_TRUE(LiteralTestUtil::Equal(result, expected)); } +TEST_F(InlinerTest, MapParameter) { + Shape r0f32 = ShapeUtil::MakeShape(F32, {}); + + auto param_builder = HloComputation::Builder(TestName()); + param_builder.AddInstruction(HloInstruction::CreateParameter(0, r0f32, "p0")); + param_builder.AddInstruction(HloInstruction::CreateParameter(1, r0f32, "p1")); + auto param_f32 = param_builder.Build(); + + auto builder = HloComputation::Builder("MapParamFunction"); + auto lhs = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(1))); + auto rhs = builder.AddInstruction( + HloInstruction::CreateConstant(LiteralUtil::CreateR0(4))); + builder.AddInstruction( + HloInstruction::CreateMap(lhs->shape(), {lhs, rhs}, param_f32.get())); + + auto computation = builder.Build(); + auto hlo_module = CreateNewVerifiedModule(); + hlo_module->AddEmbeddedComputation(std::move(param_f32)); + hlo_module->AddEntryComputation(std::move(computation)); + + Inliner inliner; + EXPECT_TRUE(inliner.Run(hlo_module.get()).ValueOrDie()); + EXPECT_THAT(hlo_module->entry_computation()->root_instruction(), rhs); + + // Verify execution on CPU. + auto result = ExecuteAndTransfer(hlo_module->Clone(), {}); + auto expected = LiteralUtil::CreateR0(4); + EXPECT_TRUE(LiteralTestUtil::Equal(result, expected)); +} } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 395e01fb59..9ebb603ca5 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -1862,6 +1862,7 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kRemainder: case HloOpcode::kReverse: case HloOpcode::kRoundNearestAfz: + case HloOpcode::kScatter: case HloOpcode::kSelect: case HloOpcode::kSelectAndScatter: case HloOpcode::kShiftLeft: @@ -1899,7 +1900,6 @@ bool LayoutAssignment::InstructionCanChangeLayout( case HloOpcode::kReduce: case HloOpcode::kReshape: case HloOpcode::kRng: - case HloOpcode::kScatter: case HloOpcode::kSend: case HloOpcode::kSendDone: case HloOpcode::kAfterAll: diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc index 6fed7c76d0..6ef6b58e50 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis.cc @@ -771,6 +771,7 @@ bool TuplePointsToAnalysis::CanShareOperandBufferWithUser( } } if (user->opcode() == HloOpcode::kDynamicUpdateSlice || + user->opcode() == HloOpcode::kScatter || user->opcode() == HloOpcode::kWhile) { // We eliminated other users in BufferLiveness::live_range_strictly_before, // so here we just need to check that the use is at operand index 0. diff --git a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc index e9a07b14ed..a571bd571b 100644 --- a/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc +++ b/tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc @@ -1010,6 +1010,44 @@ TEST_F(CanShareOperandBufferWithUserTest, DynamicUpdateSliceCanShare) { points_to_analysis_->CanShareOperandBufferWithUser(starts, {}, dus, {})); } +TEST_F(CanShareOperandBufferWithUserTest, ScatterCanShare) { + const char* hlo_text = R"( + HloModule TensorFlowScatterV1 + + update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) + } + + ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + updates = s32[2,3] parameter(2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 + } + )"; + TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text)); + computation_ = module_->entry_computation(); + RunAnalysis(); + + HloInstruction* operand_param = computation_->parameter_instruction(0); + HloInstruction* indices_param = computation_->parameter_instruction(1); + HloInstruction* updates_param = computation_->parameter_instruction(2); + HloInstruction* scatter = computation_->root_instruction(); + + EXPECT_TRUE(points_to_analysis_->CanShareOperandBufferWithUser( + operand_param, {}, scatter, {})); + EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser( + indices_param, {}, scatter, {})); + EXPECT_FALSE(points_to_analysis_->CanShareOperandBufferWithUser( + updates_param, {}, scatter, {})); +} + TEST_F(CanShareOperandBufferWithUserTest, SortCanShare) { auto builder = HloComputation::Builder(TestName()); diff --git a/tensorflow/compiler/xla/tests/scatter_test.cc b/tensorflow/compiler/xla/tests/scatter_test.cc index b21dd56045..7e1f4aa0eb 100644 --- a/tensorflow/compiler/xla/tests/scatter_test.cc +++ b/tensorflow/compiler/xla/tests/scatter_test.cc @@ -69,6 +69,37 @@ ENTRY main { RunTest(hlo_text, &operand, &scatter_indices, &updates); } +XLA_TEST_F(ScatterTest, TensorFlowScatterV1_WithFusedAdds) { + const string hlo_text = R"( +HloModule TensorFlowScatterV1 + +update_s32 (lhs: s32[], rhs: s32[]) -> s32[] { + lhs = s32[] parameter(0) + ROOT rhs = s32[] parameter(1) +} + +ENTRY main { + p0 = s32[3,3] parameter(0) + operand = s32[3,3] add(p0, p0) + p1 = s32[2] parameter(1) + indices = s32[2] add(p1, p1) + p2 = s32[2,3] parameter(2) + updates = s32[2,3] add(p2, p2) + ROOT scatter = s32[3,3] scatter(operand, indices, updates), + to_apply=update_s32, + update_window_dims={1}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0}, + index_vector_dim=1 +} +)"; + Literal operand = + LiteralUtil::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + Literal scatter_indices = LiteralUtil::CreateR1({0, 1}); + Literal updates = LiteralUtil::CreateR2({{10, 20, 30}, {70, 80, 90}}); + RunTest(hlo_text, &operand, &scatter_indices, &updates); +} + XLA_TEST_F(ScatterTest, TensorFlowScatterV2_Update) { const char* hlo_text = R"( HloModule TensorFlowScatterV2 @@ -98,6 +129,37 @@ ENTRY main { RunTest(hlo_text, &operand, &scatter_indices, &updates); } +XLA_TEST_F(ScatterTest, SimpleR4) { + const char* hlo_text = R"( +HloModule SimpleR4 + +add_f32 (lhs: f32[], rhs: f32[]) -> f32[] { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT add = f32[] add(f32[] lhs, f32[] rhs) +} + +ENTRY main { + operand = f32[1,2,2,1] parameter(0) + indices = s32[1,3] parameter(1) + updates = f32[1,2,2,1] parameter(2) + ROOT scatter = f32[1,2,2,1] scatter(operand, indices, updates), + to_apply=add_f32, + update_window_dims={1,2,3}, + inserted_window_dims={0}, + scatter_dims_to_operand_dims={0, 2, 1}, + index_vector_dim=1 +} +)"; + + Literal operand = + LiteralUtil::CreateR4({{{{0.f}, {0.f}}, {{0.f}, {0.f}}}}); + Literal updates = + LiteralUtil::CreateR4({{{{0.12}, {0.28}}, {{0.018}, {0.42}}}}); + Literal scatter_indices = LiteralUtil::CreateR2({{0, 0, 0}}); + RunTest(hlo_text, &operand, &scatter_indices, &updates); +} + XLA_TEST_F(ScatterTest, TensorFlowScatter_Add) { const string hlo_text = R"( HloModule TensorFlowScatter_Add -- GitLab From e3f4d32490e9a28cba0bfa5614255dc5d517ca91 Mon Sep 17 00:00:00 2001 From: Nick Felt Date: Wed, 24 Oct 2018 18:01:36 -0700 Subject: [PATCH 0099/1078] Update tensorboard dependency to 1.12.x (#23230) Also updated tb-nightly to +1 minor version, 1.13.x. PiperOrigin-RevId: 218582588 --- tensorflow/tools/pip_package/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index b7eed56695..ceaa96b690 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -56,7 +56,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.13.3', 'six >= 1.10.0', 'protobuf >= 3.6.1', - 'tensorboard >= 1.11.0, < 1.12.0', + 'tensorboard >= 1.12.0, < 1.13.0', 'termcolor >= 1.1.0', ] @@ -85,7 +85,7 @@ else: if 'tf_nightly' in project_name: for i, pkg in enumerate(REQUIRED_PACKAGES): if 'tensorboard' in pkg: - REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.12.0a0, < 1.13.0a0' + REQUIRED_PACKAGES[i] = 'tb-nightly >= 1.13.0a0, < 1.14.0a0' break # weakref.finalize and enum were introduced in Python 3.4 -- GitLab From 43ec5a3d6ee49eadc98835d1ab18c62cafa5043d Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Wed, 24 Oct 2018 18:29:31 -0700 Subject: [PATCH 0100/1078] Fix string comparison (#23237) PiperOrigin-RevId: 218607372 --- tensorflow/tools/ci_build/builds/configured | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/builds/configured b/tensorflow/tools/ci_build/builds/configured index 3eee11fd7e..f8a9311918 100755 --- a/tensorflow/tools/ci_build/builds/configured +++ b/tensorflow/tools/ci_build/builds/configured @@ -33,7 +33,7 @@ COMMAND=("$@") export CI_BUILD_PYTHON="${CI_BUILD_PYTHON:-python}" export PYTHON_BIN_PATH="${PYTHON_BIN_PATH:-$(which ${CI_BUILD_PYTHON})}" # XLA currently does not build under Android, so disable it for now. -if [[ "${CONTAINER_TYPE}" -eq 'android' ]]; then +if [[ "${CONTAINER_TYPE}" == 'android' ]]; then export TF_ENABLE_XLA=0 fi -- GitLab From dd9ebe12df7906a3211b8db2d21fa73c4504d118 Mon Sep 17 00:00:00 2001 From: "Meng, Peng" Date: Thu, 25 Oct 2018 11:03:59 +0800 Subject: [PATCH 0101/1078] fix softmax dims error Change-Id: I3303f368053a691787a0922098ee75e3b0c26219 Conflicts: tensorflow/core/kernels/mkl_softmax_op.cc --- tensorflow/core/kernels/mkl_softmax_op.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index cfab529662..92167e06d5 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -50,8 +50,8 @@ class MklSoftmaxOp : public OpKernel { // src_tensor now points to the 0-th input of global data struct "context" size_t src_idx = 0; const Tensor& src_tensor = MklGetInput(context, src_idx); - const int input_dims = src_tensor.dims(); - + //const int input_dims = src_tensor.dims(); + // printf("input_dims = %d\n", input_dims); // Add: get MklShape MklDnnShape src_mkl_shape; GetMklShape(context, src_idx, &src_mkl_shape); @@ -61,6 +61,7 @@ class MklSoftmaxOp : public OpKernel { auto src_tf_shape = src_mkl_shape.IsMklTensor() ? src_mkl_shape.GetTfShape() : src_tensor.shape(); + const int input_dims = src_tf_shape.dims(); auto src_dims = TFShapeToMklDnnDims(src_tf_shape); auto output_dims = src_dims; memory::format layout_type; -- GitLab From 76fd3b394bb170437baee31516967f4d583869be Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Thu, 25 Oct 2018 13:32:54 +0800 Subject: [PATCH 0102/1078] Fix clang-format issues. --- tensorflow/core/graph/mkl_layout_pass.cc | 242 +++++++++++------------ 1 file changed, 113 insertions(+), 129 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 233c5ab39b..e041ab14ca 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -22,10 +22,10 @@ limitations under the License. #include #include #include +#include #include #include #include -#include #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/optimization_registry.h" @@ -513,15 +513,19 @@ class MklLayoutRewritePass : public GraphOptimizationPass { GetConv2DBackpropFilterOrBiasAddGrad}); // - // Add rules to fuse sequences such as "Transpose (NCHW -> NHWC) + Conv2D (NHWC) + Transpose (NHWC-> + // Add rules to fuse sequences such as "Transpose (NCHW -> NHWC) + Conv2D + // (NHWC) + Transpose (NHWC-> // NCHW) " => "Conv2D (NCHW). Such patterns occur frequently in Keras. - // Note: we use the term "merge" is to combine (exactly) 2 nodes into one, while "fusion" is + // Note: we use the term "merge" is to combine (exactly) 2 nodes into one, + // while "fusion" is // for 3+ nodes situation. // // Transpose + Conv2d + Transpose: - std::vector transpose_to_nhwc = { NCHW::dim::N, NCHW::dim::H, NCHW::dim::W, NCHW::dim::C }; - std::vector transpose_to_nchw = { NHWC::dim::N, NHWC::dim::C, NHWC::dim::H, NHWC::dim::W }; + std::vector transpose_to_nhwc = {NCHW::dim::N, NCHW::dim::H, + NCHW::dim::W, NCHW::dim::C}; + std::vector transpose_to_nchw = {NHWC::dim::N, NHWC::dim::C, + NHWC::dim::H, NHWC::dim::W}; auto CheckForTransposeToNHWC = std::bind(CheckForTranspose, std::placeholders::_1, transpose_to_nhwc); auto CheckForConv2dOp = @@ -531,13 +535,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { auto FuseConv2D = std::bind(FuseTransposeMklOpTranspose, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, "NCHW"); - finfo_.push_back({ - "transpose-elimination for Conv2D", { - CheckForTransposeToNHWC, CheckForConv2dOp, CheckForTransposeToNCHW - }, - // CheckForMklOp - FuseConv2D, CopyAttrsConv - }); + finfo_.push_back( + {"transpose-elimination for Conv2D", + {CheckForTransposeToNHWC, CheckForConv2dOp, CheckForTransposeToNCHW}, + // CheckForMklOp + FuseConv2D, + CopyAttrsConv}); } // Standard interface to run pass @@ -592,20 +595,20 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // structure to specify information used in node fusion of 2+ operators typedef struct { - std::string pattern_name; // name to describe this pattern, such as - // "Transpose_Mklop_Transpose". - std::vector > - node_checkers; // extra restriction checker for these ops - std::function< - Status(std::unique_ptr *, std::vector &, - std::function)> + std::string pattern_name; // name to describe this pattern, such as + // "Transpose_Mklop_Transpose". + std::vector > + node_checkers; // extra restriction checker for these ops + std::function*, std::vector&, + std::function)> fuse_func; - std::function copy_attrs; + std::function copy_attrs; } FusionInfo; // // dimension indices for 2D tensor. - // + // struct NCHW { enum dim { N = 0, C = 1, H = 2, W = 3 }; }; @@ -614,7 +617,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { enum dim { N = 0, H = 1, W = 2, C = 3 }; }; - // // dimension indices for 3D tensor. // @@ -889,30 +891,29 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // // @return tuple. If we can find such nodes, the first // element of the tuple is a true. Otherwise, it's false. - std::tuple, const MklLayoutRewritePass::FusionInfo> - CheckForNodeFusion(Node *n) const; + std::tuple, const MklLayoutRewritePass::FusionInfo> + CheckForNodeFusion(Node* n) const; // Fuse nodes in the vector "nodes" - Status FuseNode(std::unique_ptr *g, std::vector &nodes, + Status FuseNode(std::unique_ptr* g, std::vector& nodes, const MklLayoutRewritePass::FusionInfo fi); static Status FuseTransposeMklOpTranspose( - std::unique_ptr *g, std::vector &nodes, - std::function copy_attrs, + std::unique_ptr* g, std::vector& nodes, + std::function copy_attrs, string data_format); - static bool CheckForTranspose(const Node *node, std::vector perm) { + static bool CheckForTranspose(const Node* node, std::vector perm) { // // Check node node, to see if it's "Transpose" // - if (node->type_string() != "Transpose") - return false; + if (node->type_string() != "Transpose") return false; // // Check if has out control edge. If true, this is a training graph. // Currently we focus on inference and do no fusion in training. // - for (const Edge *e : node->out_edges()) { + for (const Edge* e : node->out_edges()) { if (e->IsControlEdge()) { return false; } @@ -921,7 +922,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // // If "Transpose" has input control edges, don't fuse on it. // - for (const Edge *e : node->in_edges()) { + for (const Edge* e : node->in_edges()) { if (e->IsControlEdge()) { return false; } @@ -930,19 +931,19 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // // If "Transpose" has multiple output data edges, also don't fuse it. // - if (node->num_outputs() > 1 || node->out_edges().size() > 1) - return false; + if (node->num_outputs() > 1 || node->out_edges().size() > 1) return false; // Check "perm" attribute, make sure it's what we want. // - for (const Edge *e : node->in_edges()) { + for (const Edge* e : node->in_edges()) { if (!e->IsControlEdge()) { - const Node *perm_node = e->src(); + const Node* perm_node = e->src(); const int kPermTensorIndex = 1; - if (perm_node->type_string() == "Const" && e->dst_input() == kPermTensorIndex) { + if (perm_node->type_string() == "Const" && + e->dst_input() == kPermTensorIndex) { // we find the "perm" node, now try to retrieve its value. - const TensorProto *proto = nullptr; + const TensorProto* proto = nullptr; CHECK_EQ(GetNodeAttr(perm_node->def(), "value", &proto).ok(), true); DataType type; @@ -955,23 +956,28 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // if (type == DT_INT32) { const int type_size = 4; - const int *tensor_content = reinterpret_cast(proto->tensor_content().c_str()); - const int tensor_content_size = proto->tensor_content().size() / type_size; - - std::vector perm_value(tensor_content, tensor_content + tensor_content_size); + const int* tensor_content = + reinterpret_cast(proto->tensor_content().c_str()); + const int tensor_content_size = + proto->tensor_content().size() / type_size; + + std::vector perm_value(tensor_content, + tensor_content + tensor_content_size); return perm_value == perm; } else if (type == DT_INT64) { const int type_size = 8; - const long *tensor_content = reinterpret_cast(proto->tensor_content().c_str()); - const int tensor_content_size = proto->tensor_content().size() / type_size; - - std::vector perm_value(tensor_content, tensor_content + tensor_content_size); + const long* tensor_content = + reinterpret_cast(proto->tensor_content().c_str()); + const int tensor_content_size = + proto->tensor_content().size() / type_size; + + std::vector perm_value(tensor_content, + tensor_content + tensor_content_size); std::vector long_perm(perm.cbegin(), perm.cend()); return perm_value == long_perm; - } return false; @@ -982,17 +988,15 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } - static bool CheckForMklOp(const Node *node, string name = "") { + static bool CheckForMklOp(const Node* node, string name = "") { if (!name.empty() && node->type_string() != name) { return false; } // if mklop has multiple outputs, don't fuse it. - if (node->num_outputs() > 1) - return false; + if (node->num_outputs() > 1) return false; - if (node->out_edges().size() > 1) - return false; + if (node->out_edges().size() > 1) return false; DataType T; TF_CHECK_OK(GetNodeAttr(node->def(), "T", &T)); @@ -1255,23 +1259,23 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // We need operator-specific function to copy attributes because the framework // does not provide any generic function for it. // NOTE: names are alphabetically sorted. - static void CopyAttrsAddN(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsBiasAddGrad(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsBiasAddGrad(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsConcat(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsConcat(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsConcatV2(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsConcatV2(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsConv(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsConv(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsDataType(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsFusedBatchNorm(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsLRN(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsPooling(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb, bool change_format = false); static void CopyAttrsQuantizedPooling(const Node* orig_node, NodeBuilder* nb, bool change_format = false); @@ -1279,16 +1283,15 @@ class MklLayoutRewritePass : public GraphOptimizationPass { bool change_format = false); static void CopyAttrsQuantizedConcat(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsReshape(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsReshape(const Node* orig_node, NodeBuilder* nb, bool change_format = false); static void CopyAttrsRequantize(const Node* orig_node, NodeBuilder* nb, bool change_format = false); static void CopyAttrsSlice(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - static void CopyAttrsSplit(const Node *orig_node, NodeBuilder *nb, + static void CopyAttrsSplit(const Node* orig_node, NodeBuilder* nb, bool change_format = false); - // Generate a graph node in graph 'g' representing a dummy Mkl tensor node, // using node for original node 'orig_node' and return it in '*out'. // TODO(nhasabni) We should move this to mkl_util.h @@ -1788,7 +1791,7 @@ void MklLayoutRewritePass::AddWorkSpaceEdgeIfNeeded( // Op-specific functions to copy attributes from old node to new node ////////////////////////////////////////////////////////////////////////// -void MklLayoutRewritePass::CopyAttrsConv(const Node *orig_node, NodeBuilder *nb, +void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, NodeBuilder* nb, bool change_format) { DataType T; string data_format; @@ -1817,55 +1820,40 @@ void MklLayoutRewritePass::CopyAttrsConv(const Node *orig_node, NodeBuilder *nb, std::vector new_dilations; if (strides.size() == 5) { // - // "strides" and "dilations" also need to be changed according to "data_format", + // "strides" and "dilations" also need to be changed according to + // "data_format", // in this case, is "NDHWC" to "NCDHW". // - new_strides = { - strides[NDHWC::dim::N], - strides[NDHWC::dim::C], - strides[NDHWC::dim::D], - strides[NDHWC::dim::H], - strides[NDHWC::dim::W] - }; + new_strides = {strides[NDHWC::dim::N], strides[NDHWC::dim::C], + strides[NDHWC::dim::D], strides[NDHWC::dim::H], + strides[NDHWC::dim::W]}; nb->Attr("strides", new_strides); - new_dilations = { - dilations[NDHWC::dim::N], - dilations[NDHWC::dim::C], - dilations[NDHWC::dim::D], - dilations[NDHWC::dim::H], - dilations[NDHWC::dim::W] - }; + new_dilations = {dilations[NDHWC::dim::N], dilations[NDHWC::dim::C], + dilations[NDHWC::dim::D], dilations[NDHWC::dim::H], + dilations[NDHWC::dim::W]}; nb->Attr("dilations", new_dilations); } else { // - // "strides" and "dilations" also need to be changed according to "data_format", + // "strides" and "dilations" also need to be changed according to + // "data_format", // in this case, is "NHWC" to "NCHW". // - - new_strides = { - strides[NHWC::dim::N], - strides[NHWC::dim::C], - strides[NHWC::dim::H], - strides[NHWC::dim::W] - }; + + new_strides = {strides[NHWC::dim::N], strides[NHWC::dim::C], + strides[NHWC::dim::H], strides[NHWC::dim::W]}; nb->Attr("strides", new_strides); - new_dilations = { - dilations[NHWC::dim::N], - dilations[NHWC::dim::C], - dilations[NHWC::dim::H], - dilations[NHWC::dim::W] - }; + new_dilations = {dilations[NHWC::dim::N], dilations[NHWC::dim::C], + dilations[NHWC::dim::H], dilations[NHWC::dim::W]}; nb->Attr("dilations", new_dilations); } } } -void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node, - NodeBuilder* nb, +void MklLayoutRewritePass::CopyAttrsAddN(const Node* orig_node, NodeBuilder* nb, bool change_format) { DataType T; int N; @@ -1897,8 +1885,7 @@ void MklLayoutRewritePass::CopyAttrsBiasAddGrad(const Node* orig_node, nb->Attr("data_format", data_format); } -void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node, - NodeBuilder* nb, +void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb, bool change_format) { DataType T; int depth_radius; @@ -2037,8 +2024,7 @@ void MklLayoutRewritePass::CopyAttrsReshape(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsSlice(const Node* orig_node, - NodeBuilder* nb, - bool change_format) { + NodeBuilder* nb, bool change_format) { DataType T; DataType Index; @@ -2051,8 +2037,7 @@ void MklLayoutRewritePass::CopyAttrsSlice(const Node* orig_node, } void MklLayoutRewritePass::CopyAttrsSplit(const Node* orig_node, - NodeBuilder* nb, - bool change_format) { + NodeBuilder* nb, bool change_format) { DataType T; string data_format; int num_split; @@ -2665,28 +2650,28 @@ MklLayoutRewritePass::CheckForNodeRewrite(const Node* n) const { // Helper functions for node fusion ////////////////////////////////////////////////////////////////////////// Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( - std::unique_ptr *g, std::vector &nodes, - std::function copy_attrs, + std::unique_ptr* g, std::vector& nodes, + std::function copy_attrs, string data_format) { - Node *transpose_to_nhwc = nodes[0]; - Node *mklop = nodes[1]; - Node *transpose_to_nchw = nodes[2]; + Node* transpose_to_nhwc = nodes[0]; + Node* mklop = nodes[1]; + Node* transpose_to_nchw = nodes[2]; const int transpose_nhwc_num_inputs = transpose_to_nhwc->num_inputs(); - gtl::InlinedVector transpose_nhwc_control_edges; - gtl::InlinedVector, 4> transpose_nhwc_in( + gtl::InlinedVector transpose_nhwc_control_edges; + gtl::InlinedVector, 4> transpose_nhwc_in( transpose_nhwc_num_inputs); FillInputs(transpose_to_nhwc, &transpose_nhwc_control_edges, &transpose_nhwc_in); const int mklop_num_inputs = mklop->num_inputs(); - gtl::InlinedVector mklop_control_edges; - gtl::InlinedVector, 4> mklop_in(mklop_num_inputs); + gtl::InlinedVector mklop_control_edges; + gtl::InlinedVector, 4> mklop_in(mklop_num_inputs); FillInputs(mklop, &mklop_control_edges, &mklop_in); const int transpose_nchw_num_inputs = transpose_to_nchw->num_inputs(); - gtl::InlinedVector transpose_nchw_control_edges; - gtl::InlinedVector, 4> transpose_nchw_in( + gtl::InlinedVector transpose_nchw_control_edges; + gtl::InlinedVector, 4> transpose_nchw_in( transpose_nchw_num_inputs); FillInputs(transpose_to_nhwc, &transpose_nchw_control_edges, &transpose_nchw_in); @@ -2706,19 +2691,19 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( } } - copy_attrs(const_cast(mklop), &nb, true); + copy_attrs(const_cast(mklop), &nb, true); nb.Attr("data_format", data_format); // Copy the device assigned to old node to new node. nb.Device(mklop->def().device()); // Create node. - Node *new_node; + Node* new_node; TF_CHECK_OK(nb.Finalize(&**g, &new_node)); CHECK_NOTNULL(new_node); // Fill outputs. - for (const Edge *e : transpose_to_nchw->out_edges()) { + for (const Edge* e : transpose_to_nchw->out_edges()) { if (!e->IsControlEdge()) { const int kConv2DWithBiasOutputSlot = 0; CHECK_NOTNULL((*g)->AddEdge(new_node, kConv2DWithBiasOutputSlot, e->dst(), @@ -2736,18 +2721,17 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( return Status::OK(); } -Status -MklLayoutRewritePass::FuseNode(std::unique_ptr *g, - std::vector &nodes, - const MklLayoutRewritePass::FusionInfo fi) { +Status MklLayoutRewritePass::FuseNode( + std::unique_ptr* g, std::vector& nodes, + const MklLayoutRewritePass::FusionInfo fi) { return fi.fuse_func(g, nodes, fi.copy_attrs); } -std::tuple, const MklLayoutRewritePass::FusionInfo> -MklLayoutRewritePass::CheckForNodeFusion(Node *a) const { +std::tuple, const MklLayoutRewritePass::FusionInfo> +MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { bool found_pattern = false; - std::vector nodes; - const FusionInfo *fi_ptr = nullptr; + std::vector nodes; + const FusionInfo* fi_ptr = nullptr; for (auto fi = finfo_.begin(); fi != finfo_.end(); ++fi) { assert(fi->ops.size() == fi->node_checkers.size()); @@ -2760,7 +2744,7 @@ MklLayoutRewritePass::CheckForNodeFusion(Node *a) const { // // Initialize "current_node" as node "a". - Node *current_node = a; + Node* current_node = a; for (auto node_index = 0; node_index < fi->node_checkers.size(); ++node_index) { // Make sure current node meet the requirement of corresponding node @@ -2781,9 +2765,9 @@ MklLayoutRewritePass::CheckForNodeFusion(Node *a) const { // Find current node's direct descendant, which will be used in next // iteration. auto check_next_node = fi->node_checkers[node_index + 1]; - for (const Edge *e : current_node->out_edges()) { + for (const Edge* e : current_node->out_edges()) { if (!e->IsControlEdge()) { - Node *candidate_node = e->dst(); + Node* candidate_node = e->dst(); if (check_next_node(candidate_node) == false) { current_node = nullptr; @@ -2933,7 +2917,7 @@ bool MklLayoutRewritePass::RunPass(std::unique_ptr* g) { order.clear(); GetReversePostOrder(**g, &order); // This will give us topological sort. - for (Node *n : order) { + for (Node* n : order) { // If node is not an op or it cannot run on CPU device, then skip. if (!n->IsOp() || !CanOpRunOnCPUDevice(n)) { continue; @@ -2941,7 +2925,7 @@ bool MklLayoutRewritePass::RunPass(std::unique_ptr* g) { auto check_result = CheckForNodeFusion(n); bool found_pattern = std::get<0>(check_result); - std::vector nodes = std::get<1>(check_result); + std::vector nodes = std::get<1>(check_result); const FusionInfo fi = std::get<2>(check_result); // if "found_pattern" is true, we can do the fusion. -- GitLab From 8d068a857dfbeb5cd417eb342a6ba7c330e0a270 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Thu, 25 Oct 2018 13:46:07 +0800 Subject: [PATCH 0103/1078] Fxi clang-format issues. --- tensorflow/core/graph/mkl_layout_pass_test.cc | 238 +++++++++--------- 1 file changed, 126 insertions(+), 112 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index b09ef3b970..f4f2ab2a97 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -457,53 +457,57 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) { TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Positive) { InitGraph( - "node { name: 'Input0' op: 'Input'}" - "node { name: 'Input1' op: 'Input'}" - "node { name: 'Const0' op: 'Const'" - " attr {" - " key: 'dtype'" - " value {" - " type: DT_INT32" - " }" - " }" - " attr {" - " key: 'value'" - " value {" - " tensor {" - " dtype: DT_INT32" - " tensor_shape {" - " dim {" - " size: 4" - " }" - " }" - " tensor_content: '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000'" - " }" - " }" - " }" - "}" - "node { name: 'Const1' op: 'Const'" - " attr {" - " key: 'dtype'" - " value {" - " type: DT_INT32" - " }" - " }" - " attr {" - " key: 'value'" - " value {" - " tensor {" - " dtype: DT_INT32" - " tensor_shape {" - " dim {" - " size: 4" - " }" - " }" - " tensor_content: '\\000\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000\\002\\000\\000\\000'" - " }" - " }" - " }" - "}" - "node { \ + "node { name: 'Input0' op: 'Input'}" + "node { name: 'Input1' op: 'Input'}" + "node { name: 'Const0' op: 'Const'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_INT32" + " }" + " }" + " attr {" + " key: 'value'" + " value {" + " tensor {" + " dtype: DT_INT32" + " tensor_shape {" + " dim {" + " size: 4" + " }" + " }" + " tensor_content: " + "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000" + "\\000\\000'" + " }" + " }" + " }" + "}" + "node { name: 'Const1' op: 'Const'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_INT32" + " }" + " }" + " attr {" + " key: 'value'" + " value {" + " tensor {" + " dtype: DT_INT32" + " tensor_shape {" + " dim {" + " size: 4" + " }" + " }" + " tensor_content: " + "'\\000\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000\\002\\000" + "\\000\\000'" + " }" + " }" + " }" + "}" + "node { \ name: 'Transpose0' \ op: 'Transpose' \ input: 'Input0' \ @@ -520,8 +524,8 @@ TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Positive) { type: DT_INT32 \ } \ } \ - }" - "node { \ + }" + "node { \ name: 'Conv2D' \ op: 'Conv2D' \ input: 'Transpose0' \ @@ -573,7 +577,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Positive) { } \ } \ }" - "node { \ + "node { \ name: 'Transpose1' \ op: 'Transpose' \ input: 'Conv2D' \ @@ -591,65 +595,71 @@ TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Positive) { } \ } \ }" - "node { name: 'Relu' op: 'Relu'" + "node { name: 'Relu' op: 'Relu'" " attr { key: 'T' value { type: DT_FLOAT } }" " input: ['Transpose1'] }"); EXPECT_EQ(DoMklLayoutOptimizationPass(), "Const0(Const);Const1(Const);" "Conv2D(_MklConv2D);DMT/_0(Const);DMT/_1(Const);Input0(Input);" - "Input1(Input);Relu(_MklRelu)|Conv2D->Relu;Conv2D:2->Relu:1;DMT/_0->Conv2D:2;DMT/_1->Conv2D:3;Input0->Conv2D;" - "Input0:control->DMT/_0:control;Input0:control->DMT/_1:control;Input1->Conv2D:1"); + "Input1(Input);Relu(_MklRelu)|Conv2D->Relu;Conv2D:2->Relu:1;DMT/" + "_0->Conv2D:2;DMT/_1->Conv2D:3;Input0->Conv2D;" + "Input0:control->DMT/_0:control;Input0:control->DMT/" + "_1:control;Input1->Conv2D:1"); } TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Negative) { InitGraph( - "node { name: 'Input0' op: 'Input'}" - "node { name: 'Input1' op: 'Input'}" - "node { name: 'Const0' op: 'Const'" - " attr {" - " key: 'dtype'" - " value {" - " type: DT_INT32" - " }" - " }" - " attr {" - " key: 'value'" - " value {" - " tensor {" - " dtype: DT_INT32" - " tensor_shape {" - " dim {" - " size: 4" - " }" - " }" - " tensor_content: '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000'" - " }" - " }" - " }" - "}" - "node { name: 'Const1' op: 'Const'" - " attr {" - " key: 'dtype'" - " value {" - " type: DT_INT32" - " }" - " }" - " attr {" - " key: 'value'" - " value {" - " tensor {" - " dtype: DT_INT32" - " tensor_shape {" - " dim {" - " size: 4" - " }" - " }" - " tensor_content: '\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000\\000\\000'" - " }" - " }" - " }" - "}" - "node { \ + "node { name: 'Input0' op: 'Input'}" + "node { name: 'Input1' op: 'Input'}" + "node { name: 'Const0' op: 'Const'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_INT32" + " }" + " }" + " attr {" + " key: 'value'" + " value {" + " tensor {" + " dtype: DT_INT32" + " tensor_shape {" + " dim {" + " size: 4" + " }" + " }" + " tensor_content: " + "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000" + "\\000\\000'" + " }" + " }" + " }" + "}" + "node { name: 'Const1' op: 'Const'" + " attr {" + " key: 'dtype'" + " value {" + " type: DT_INT32" + " }" + " }" + " attr {" + " key: 'value'" + " value {" + " tensor {" + " dtype: DT_INT32" + " tensor_shape {" + " dim {" + " size: 4" + " }" + " }" + " tensor_content: " + "'\\000\\000\\000\\000\\002\\000\\000\\000\\003\\000\\000\\000\\001\\000" + "\\000\\000'" + " }" + " }" + " }" + "}" + "node { \ name: 'Transpose0' \ op: 'Transpose' \ input: 'Input0' \ @@ -666,8 +676,8 @@ TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Negative) { type: DT_INT32 \ } \ } \ - }" - "node { \ + }" + "node { \ name: 'Conv2D' \ op: 'Conv2D' \ input: 'Transpose0' \ @@ -719,7 +729,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Negative) { } \ } \ }" - "node { \ + "node { \ name: 'Transpose1' \ op: 'Transpose' \ input: 'Conv2D' \ @@ -737,17 +747,21 @@ TEST_F(MklLayoutPassTest, NodeMerge_TransposeConv2DTranspose_Negative) { } \ } \ }" - "node { name: 'Relu' op: 'Relu'" + "node { name: 'Relu' op: 'Relu'" " attr { key: 'T' value { type: DT_FLOAT } }" " input: ['Transpose1'] }"); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "Const0(Const);Const1(Const);" - "Conv2D(_MklConv2D);DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);" - "Input0(Input);Input1(Input);Relu(_MklRelu);" - "Transpose0(Transpose);Transpose1(Transpose)|Const0->Transpose0:1;Const1->Transpose1:1;" - "Conv2D->Transpose1;DMT/_0->Conv2D:2;DMT/_1->Conv2D:3;DMT/_2->Relu:1;Input0->Transpose0;" - "Input1->Conv2D:1;Transpose0->Conv2D;Transpose0:control->DMT/_0:control;" - "Transpose0:control->DMT/_1:control;Transpose1->Relu;Transpose1:control->DMT/_2:control"); + EXPECT_EQ( + DoMklLayoutOptimizationPass(), + "Const0(Const);Const1(Const);" + "Conv2D(_MklConv2D);DMT/_0(Const);DMT/_1(Const);DMT/_2(Const);" + "Input0(Input);Input1(Input);Relu(_MklRelu);" + "Transpose0(Transpose);Transpose1(Transpose)|Const0->Transpose0:1;Const1-" + ">Transpose1:1;" + "Conv2D->Transpose1;DMT/_0->Conv2D:2;DMT/_1->Conv2D:3;DMT/" + "_2->Relu:1;Input0->Transpose0;" + "Input1->Conv2D:1;Transpose0->Conv2D;Transpose0:control->DMT/_0:control;" + "Transpose0:control->DMT/" + "_1:control;Transpose1->Relu;Transpose1:control->DMT/_2:control"); } ///////////////////////////////////////////////////////////////////// -- GitLab From ec31b13690118d1998824ba4d350fcbc22fbfb60 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 25 Oct 2018 09:22:14 -0700 Subject: [PATCH 0104/1078] Explicitly quote every command piece. (#23259) PiperOrigin-RevId: 218399942 --- third_party/repo.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/repo.bzl b/third_party/repo.bzl index 6e30618d39..391622e237 100644 --- a/third_party/repo.bzl +++ b/third_party/repo.bzl @@ -26,7 +26,7 @@ def _wrap_bash_cmd(ctx, cmd): bazel_sh = _get_env_var(ctx, "BAZEL_SH") if not bazel_sh: fail("BAZEL_SH environment variable is not set") - cmd = [bazel_sh, "-l", "-c", " ".join(cmd)] + cmd = [bazel_sh, "-l", "-c", " ".join(["\"%s\"" % s for s in cmd])] return cmd def _get_env_var(ctx, name): -- GitLab From a315296d577b09eca88fe1a6cd36a13502d72067 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 25 Oct 2018 10:44:18 -0700 Subject: [PATCH 0105/1078] Don't set TF_PER_DEVICE_MEMORY_LIMIT_MB as a --test_env if it isn't specified. (#23258) PiperOrigin-RevId: 218634344 --- .../tools/ci_build/ci_parameterized_build.sh | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index bc9cb4e9a1..435ec7ca68 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -128,8 +128,9 @@ NO_DOCKER_OPT_FLAG="--genrule_strategy=standalone" DO_DOCKER=1 -# Bazel uses defaults for all test sizes when given `-1`. -TF_BUILD_TEST_TIMEOUT=${TF_BUILD_TEST_TIMEOUT:--1} +# Default values for various settings. +TF_BUILD_TEST_TIMEOUT=${TF_BUILD_TEST_TIMEOUT:--1} # Use bazel defaults +TF_GPU_COUNT=${TF_GPU_COUNT:-4} # Helpful flags: # --test_summary=detailed: Tell us more about which targets are being built @@ -144,9 +145,20 @@ TF_BUILD_TEST_TIMEOUT=${TF_BUILD_TEST_TIMEOUT:--1} BAZEL_TEST_FLAGS=""\ "--test_summary=detailed --build_tests_only --keep_going "\ "--test_timeout=${TF_BUILD_TEST_TIMEOUT} "\ -"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT} "\ -"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU} "\ +"--test_env=TF_GPU_COUNT=${TF_GPU_COUNT}" + +# Only set these environment variables if they're specified, to avoid causing +# problems like b/118404869, where an envvar set to the empty string has +# different semantics from an unset envvar. +if [ -n "${TF_TESTS_PER_GPU}" ]; then + BAZEL_TEST_FLAGS="${BAZEL_TEST_FLAGS} "\ +"--test_env=TF_TESTS_PER_GPU=${TF_TESTS_PER_GPU}" +fi +if [ -n "${TF_PER_DEVICE_MEMORY_LIMIT_MB}" ]; then + BAZEL_TEST_FLAGS="${BAZEL_TEST_FLAGS} "\ "--test_env=TF_PER_DEVICE_MEMORY_LIMIT_MB=${TF_PER_DEVICE_MEMORY_LIMIT_MB}" +fi + BAZEL_BUILD_FLAGS="--keep_going" # Explicitly set jdk8 since that's what's installed in our images. Note that @@ -163,7 +175,6 @@ PIP_INTEGRATION_TESTS_FLAG="--integration_tests" ANDROID_CMD="${CI_BUILD_DIR}/builds/android.sh" ANDROID_FULL_CMD="${CI_BUILD_DIR}/builds/android_full.sh" -TF_GPU_COUNT=${TF_GPU_COUNT:-4} PARALLEL_GPU_TEST_CMD='//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute' BENCHMARK_CMD="${CI_BUILD_DIR}/builds/benchmark.sh" -- GitLab From 37a2e36733b0f12102133e8ff5fb516573bdf7ec Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 25 Oct 2018 12:49:30 -0700 Subject: [PATCH 0106/1078] Upgrade setuptools before installing absl-py in remaining scripts. (#23264) PiperOrigin-RevId: 218730741 --- .../ci_build/install/install_python3.5_pip_packages.sh | 10 ++++------ .../ci_build/install/install_python3.6_pip_packages.sh | 6 ++++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh index 61d4fe3fe8..62e04df717 100755 --- a/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.5_pip_packages.sh @@ -41,6 +41,10 @@ fi set -e pip3.5 install --upgrade pip +# Install last working version of setuptools. This must happen before we install +# absl-py, which uses install_requires notation introduced in setuptools 20.5. +pip3.5 install --upgrade setuptools==39.1.0 + pip3.5 install --upgrade virtualenv # Install six. @@ -81,15 +85,9 @@ pip3.5 install --upgrade astor pip3.5 install --upgrade gast pip3.5 install --upgrade termcolor -# Install last working version of setuptools. -pip3.5 install --upgrade setuptools==39.1.0 - # Keras pip3.5 install keras_applications==1.0.6 pip3.5 install keras_preprocessing==1.0.5 pip3.5 install --upgrade h5py==2.8.0 -# Install last working version of setuptools. -pip3.5 install --upgrade setuptools==39.1.0 - # LINT.ThenChange(//tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh) diff --git a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh index 8949af8a88..48d556b1dd 100755 --- a/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_python3.6_pip_packages.sh @@ -51,6 +51,10 @@ ln -s /usr/local/bin/pip3.6 /usr/local/bin/pip3 pip3 install --upgrade pip +# Install last working version of setuptools. This must happen before we install +# absl-py, which uses install_requires notation introduced in setuptools 20.5. +pip3 install --upgrade setuptools==39.1.0 + pip3 install --upgrade virtualenv set -e @@ -97,8 +101,6 @@ pip3 install --upgrade astor pip3 install --upgrade gast pip3 install --upgrade termcolor -# Install last working version of setuptools. -pip3 install --upgrade setuptools==39.1.0 pip3 install --upgrade h5py==2.8.0 # Keras -- GitLab From 405b34608005bc17c50dbbe915e4d68a694274ca Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 25 Oct 2018 13:56:08 -0700 Subject: [PATCH 0107/1078] Fp16 LSTMBlocKCell and LSTMBlockFusedCell (#23267) PiperOrigin-RevId: 216632480 --- tensorflow/contrib/rnn/kernels/blas_gemm.cc | 7 +- tensorflow/contrib/rnn/kernels/blas_gemm.h | 9 +- tensorflow/contrib/rnn/kernels/lstm_ops.cc | 163 +++++++++--------- tensorflow/contrib/rnn/kernels/lstm_ops.h | 34 ++-- .../contrib/rnn/kernels/lstm_ops_gpu.cu.cc | 80 +++++++-- tensorflow/contrib/rnn/ops/lstm_ops.cc | 8 +- tensorflow/contrib/rnn/python/ops/lstm_ops.py | 5 +- 7 files changed, 185 insertions(+), 121 deletions(-) diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.cc b/tensorflow/contrib/rnn/kernels/blas_gemm.cc index 45d22b739b..56ec86418d 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.cc +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.cc @@ -38,8 +38,9 @@ namespace functor { template void TensorCuBlasGemm::operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m, uint64 n, uint64 k, - T alpha, const T* a, int lda, const T* b, - int ldb, T beta, T* c, int ldc) { + float alpha, const T* a, int lda, + const T* b, int ldb, float beta, T* c, + int ldc) { #if GOOGLE_CUDA se::blas::Transpose trans[] = {se::blas::Transpose::kNoTranspose, se::blas::Transpose::kTranspose}; @@ -60,8 +61,8 @@ void TensorCuBlasGemm::operator()(OpKernelContext* ctx, bool transa, #endif } +template struct TensorCuBlasGemm; template struct TensorCuBlasGemm; -template struct TensorCuBlasGemm; } // end namespace functor } // end namespace tensorflow diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.h b/tensorflow/contrib/rnn/kernels/blas_gemm.h index a52c934233..9535a76566 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.h +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.h @@ -28,8 +28,8 @@ namespace functor { template struct TensorCuBlasGemm { void operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m, - uint64 n, uint64 k, T alpha, const T* a, int lda, const T* b, - int ldb, T beta, T* c, int ldc); + uint64 n, uint64 k, float alpha, const T* a, int lda, + const T* b, int ldb, float beta, T* c, int ldc); }; template @@ -38,8 +38,9 @@ struct TensorBlasGemm; template struct TensorBlasGemm { static void compute(OpKernelContext* ctx, const Device& d, bool transa, - bool transb, T alpha, typename TTypes::ConstMatrix a, - typename TTypes::ConstMatrix b, T beta, + bool transb, float alpha, + typename TTypes::ConstMatrix a, + typename TTypes::ConstMatrix b, float beta, typename TTypes::Matrix c) { int64 m = c.dimensions()[0]; int64 n = c.dimensions()[1]; diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.cc b/tensorflow/contrib/rnn/kernels/lstm_ops.cc index 5e7cf0ce84..ee08d306f8 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.cc +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.cc @@ -44,7 +44,7 @@ namespace functor { template void LSTMBlockCellFpropWithEigen( const LSTMBlockCell& cell, OpKernelContext* ctx, const CPUDevice& d, - const T forget_bias, const T cell_clip, bool use_peephole, + const float forget_bias, const float cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, @@ -177,50 +177,51 @@ void LSTMBlockCellBpropWithEigen( } } -#define DEFINE_CPU_SPECS(T) \ - template <> \ - void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, const CPUDevice& d, const T forget_bias, \ - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ - typename TTypes::Matrix i, typename TTypes::Matrix cs, \ - typename TTypes::Matrix f, typename TTypes::Matrix o, \ - typename TTypes::Matrix ci, typename TTypes::Matrix co, \ - typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ - LSTMBlockCellFpropWithEigen( \ - *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev, \ - h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h); \ - } \ - template <> \ - void LSTMBlockCellBprop::operator()( \ - OpKernelContext* ctx, const CPUDevice& d, bool use_peephole, \ - typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::ConstMatrix i, \ - typename TTypes::ConstMatrix cs, typename TTypes::ConstMatrix f, \ - typename TTypes::ConstMatrix o, typename TTypes::ConstMatrix ci, \ - typename TTypes::ConstMatrix co, \ - typename TTypes::ConstMatrix cs_grad, \ - typename TTypes::ConstMatrix h_grad, typename TTypes::Matrix do_, \ - typename TTypes::Matrix dcs, typename TTypes::Matrix dci, \ - typename TTypes::Matrix df, typename TTypes::Matrix di, \ - typename TTypes::Matrix dicfo, \ - typename TTypes::Matrix cs_prev_grad, \ - typename TTypes::Vec wci_grad, typename TTypes::Vec wcf_grad, \ - typename TTypes::Vec wco_grad) { \ - LSTMBlockCellBpropWithEigen( \ - *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b, \ - i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo, \ - cs_prev_grad, wci_grad, wcf_grad, wco_grad); \ - } \ - template struct LSTMBlockCellFprop; \ +#define DEFINE_CPU_SPECS(T) \ + template <> \ + void LSTMBlockCellFprop::operator()( \ + OpKernelContext* ctx, const CPUDevice& d, const float forget_bias, \ + const float cell_clip, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ + typename TTypes::Matrix i, typename TTypes::Matrix cs, \ + typename TTypes::Matrix f, typename TTypes::Matrix o, \ + typename TTypes::Matrix ci, typename TTypes::Matrix co, \ + typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ + LSTMBlockCellFpropWithEigen( \ + *this, ctx, d, forget_bias, cell_clip, use_peephole, x, cs_prev, \ + h_prev, w, wci, wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, h); \ + } \ + template <> \ + void LSTMBlockCellBprop::operator()( \ + OpKernelContext* ctx, const CPUDevice& d, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::ConstMatrix i, \ + typename TTypes::ConstMatrix cs, typename TTypes::ConstMatrix f, \ + typename TTypes::ConstMatrix o, typename TTypes::ConstMatrix ci, \ + typename TTypes::ConstMatrix co, \ + typename TTypes::ConstMatrix cs_grad, \ + typename TTypes::ConstMatrix h_grad, typename TTypes::Matrix do_, \ + typename TTypes::Matrix dcs, typename TTypes::Matrix dci, \ + typename TTypes::Matrix df, typename TTypes::Matrix di, \ + typename TTypes::Matrix dicfo, \ + typename TTypes::Matrix cs_prev_grad, \ + typename TTypes::Vec wci_grad, typename TTypes::Vec wcf_grad, \ + typename TTypes::Vec wco_grad) { \ + LSTMBlockCellBpropWithEigen( \ + *this, ctx, d, use_peephole, x, cs_prev, h_prev, w, wci, wcf, wco, b, \ + i, cs, f, o, ci, co, cs_grad, h_grad, do_, dcs, dci, df, di, dicfo, \ + cs_prev_grad, wci_grad, wcf_grad, wco_grad); \ + } \ + template struct LSTMBlockCellFprop; \ template struct LSTMBlockCellBprop; DEFINE_CPU_SPECS(float); @@ -377,24 +378,26 @@ REGISTER_KERNEL(float); #if GOOGLE_CUDA namespace functor { -#define DECLARE_GPU_SPEC(T) \ - template <> \ - void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, \ - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ - typename TTypes::Matrix i, typename TTypes::Matrix cs, \ - typename TTypes::Matrix f, typename TTypes::Matrix o, \ - typename TTypes::Matrix ci, typename TTypes::Matrix co, \ - typename TTypes::Matrix icfo, typename TTypes::Matrix h); \ - \ +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void LSTMBlockCellFprop::operator()( \ + OpKernelContext* ctx, const GPUDevice& d, const float forget_bias, \ + const float cell_clip, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ + typename TTypes::Matrix i, typename TTypes::Matrix cs, \ + typename TTypes::Matrix f, typename TTypes::Matrix o, \ + typename TTypes::Matrix ci, typename TTypes::Matrix co, \ + typename TTypes::Matrix icfo, typename TTypes::Matrix h); \ + \ extern template struct LSTMBlockCellFprop; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // end namespace functor @@ -405,6 +408,7 @@ DECLARE_GPU_SPEC(float); LSTMBlockCellOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA @@ -629,9 +633,9 @@ class LSTMBlockCellGradOp : public OpKernel { const Device& device = ctx->eigen_device(); - functor::TensorZero()(device, wci_grad_tensor->flat()); - functor::TensorZero()(device, wcf_grad_tensor->flat()); - functor::TensorZero()(device, wco_grad_tensor->flat()); + functor::TensorZero()(device, wci_grad_tensor->flat()); + functor::TensorZero()(device, wcf_grad_tensor->flat()); + functor::TensorZero()(device, wco_grad_tensor->flat()); functor::LSTMBlockCellBprop(batch_size, input_size, cell_size)( @@ -688,6 +692,7 @@ namespace functor { true /* USE_CUBLAS */>; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // namespace functor @@ -698,6 +703,7 @@ DECLARE_GPU_SPEC(float); LSTMBlockCellGradOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA @@ -984,10 +990,10 @@ class BlockLSTMOp : public OpKernel { Tensor cs_tensor = cs_out->Slice(seq_len_max, timelen); Tensor h_tensor = h_out->Slice(seq_len_max, timelen); - functor::TensorUnalignedZero()( - device, cs_tensor.unaligned_flat()); - functor::TensorUnalignedZero()( - device, h_tensor.unaligned_flat()); + functor::TensorUnalignedZero()(device, + cs_tensor.unaligned_flat()); + functor::TensorUnalignedZero()(device, + h_tensor.unaligned_flat()); } } @@ -1021,6 +1027,7 @@ namespace functor { extern template struct TensorUnalignedZero; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // end namespace functor @@ -1033,6 +1040,7 @@ DECLARE_GPU_SPEC(float); BlockLSTMOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA @@ -1195,16 +1203,15 @@ class BlockLSTMGradOp : public OpKernel { const Device& device = ctx->eigen_device(); - functor::TensorZero()(device, cs_grad_tensor.flat()); - functor::TensorZero()(device, - cs_prev_grad_tensor->flat()); - functor::TensorZero()(device, h_grad_tensor.flat()); - functor::TensorZero()(device, h_prev_grad_tensor->flat()); - functor::TensorZero()(device, w_grad_tensor->flat()); - functor::TensorZero()(device, wci_grad_tensor->flat()); - functor::TensorZero()(device, wcf_grad_tensor->flat()); - functor::TensorZero()(device, wco_grad_tensor->flat()); - functor::TensorZero()(device, b_grad_tensor->flat()); + functor::TensorZero()(device, cs_grad_tensor.flat()); + functor::TensorZero()(device, cs_prev_grad_tensor->flat()); + functor::TensorZero()(device, h_grad_tensor.flat()); + functor::TensorZero()(device, h_prev_grad_tensor->flat()); + functor::TensorZero()(device, w_grad_tensor->flat()); + functor::TensorZero()(device, wci_grad_tensor->flat()); + functor::TensorZero()(device, wcf_grad_tensor->flat()); + functor::TensorZero()(device, wco_grad_tensor->flat()); + functor::TensorZero()(device, b_grad_tensor->flat()); const int64 seq_len_max = seq_len_max_tensor->scalar()(); SliceHelper slicer(ctx); @@ -1331,6 +1338,7 @@ namespace functor { extern template struct BlockLSTMBprop; DECLARE_GPU_SPEC(float); +DECLARE_GPU_SPEC(Eigen::half); // DECLARE_GPU_SPEC(double); #undef DECLARE_GPU_SPEC } // end namespace functor @@ -1343,6 +1351,7 @@ DECLARE_GPU_SPEC(float); BlockLSTMGradOp); REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(Eigen::half); // REGISTER_GPU_KERNEL(double); #undef REGISTER_GPU_KERNEL #endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.h b/tensorflow/contrib/rnn/kernels/lstm_ops.h index d23cedc234..5ca1dad655 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.h +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.h @@ -77,8 +77,7 @@ template struct TensorZeroPadding { void operator()(const Device& d, const int64 time_idx, typename TTypes::ConstVec seq_len, - typename TTypes::Vec mask, - typename TTypes::Matrix m) { + typename TTypes::Vec mask, typename TTypes::Matrix m) { // mask is shape [batch_size]. mask.device(d) = seq_len.constant(time_idx) < seq_len; @@ -154,18 +153,21 @@ struct LSTMBlockCellFprop : public LSTMBlockCell { const int cell_size) : LSTMBlockCell(batch_size, input_size, cell_size) {} - void operator()( - OpKernelContext* ctx, const Device& d, const T forget_bias, - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, - typename TTypes::ConstMatrix cs_prev, - typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, - typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, - typename TTypes::ConstVec wco, typename TTypes::ConstVec b, - typename TTypes::Matrix xh, typename TTypes::Matrix i, - typename TTypes::Matrix cs, typename TTypes::Matrix f, - typename TTypes::Matrix o, typename TTypes::Matrix ci, - typename TTypes::Matrix co, typename TTypes::Matrix icfo, - typename TTypes::Matrix h); + void operator()(OpKernelContext* ctx, const Device& d, + const float forget_bias, const float cell_clip, + bool use_peephole, typename TTypes::ConstMatrix x, + typename TTypes::ConstMatrix cs_prev, + typename TTypes::ConstMatrix h_prev, + typename TTypes::ConstMatrix w, + typename TTypes::ConstVec wci, + typename TTypes::ConstVec wcf, + typename TTypes::ConstVec wco, + typename TTypes::ConstVec b, typename TTypes::Matrix xh, + typename TTypes::Matrix i, typename TTypes::Matrix cs, + typename TTypes::Matrix f, typename TTypes::Matrix o, + typename TTypes::Matrix ci, typename TTypes::Matrix co, + typename TTypes::Matrix icfo, + typename TTypes::Matrix h); }; // See lstm_ops.cc for CPUDevice implementation and lstm_ops_gpu.cu.cc for @@ -261,7 +263,7 @@ struct BlockLSTMBprop : public LSTMBlockCell { typename TTypes::ConstMatrix const_dicfo(dicfo.data(), dicfo.dimensions()); TensorBlasGemm::compute( - ctx, d, false, true, T(1), const_dicfo, w, T(0), xh_grad); + ctx, d, false, true, 1.f, const_dicfo, w, 0.f, xh_grad); // xh. xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x; @@ -274,7 +276,7 @@ struct BlockLSTMBprop : public LSTMBlockCell { // w_grad. TensorBlasGemm::compute( - ctx, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad); + ctx, d, true, false, 1.f, const_xh, const_dicfo, 1.f, w_grad); // b_grad. b_grad.device(d) += dicfo.sum(Eigen::array({0})); diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc index 6d3758fef1..b664b0f45e 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc +++ b/tensorflow/contrib/rnn/kernels/lstm_ops_gpu.cu.cc @@ -31,6 +31,49 @@ typedef Eigen::GpuDevice GPUDevice; namespace { +struct FloatToHalf { + __host__ __device__ EIGEN_STRONG_INLINE Eigen::half operator()( + const float& x) const { + return Eigen::half_impl::float_to_half_rtne(x); + } +}; + +template +__host__ __device__ EIGEN_STRONG_INLINE + typename std::enable_if::value, U>::type + strict_cast(T t); + +template +__host__ __device__ EIGEN_STRONG_INLINE + typename std::enable_if::value, U>::type + strict_cast(T t) { + return t; +} + +template <> +__host__ __device__ EIGEN_STRONG_INLINE Eigen::half +strict_cast(float t) { + return FloatToHalf()(t); +} + +} // namespace + +template +struct TensorZero { + void operator()(const GPUDevice& d, typename TTypes::Flat t) { + t.device(d) = t.constant(strict_cast(0.f)); + } +}; + +template +struct TensorUnalignedZero { + void operator()(const GPUDevice& d, typename TTypes::UnalignedFlat t) { + t.device(d) = t.constant(strict_cast(0.f)); + } +}; + +namespace { + // Adds bias, applies non-linearities and gates. // // Launch with a 2D setup such that there is one thread per (example, @@ -42,12 +85,15 @@ namespace { template __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev, const T* wci, const T* wcf, const T* wco, T* o, T* h, - T* ci, T* cs, T* co, T* i, T* f, const T forget_bias, - const T cell_clip, const int batch_size, - const int cell_size) { + T* ci, T* cs, T* co, T* i, T* f, + const float forget_bias, const float cell_clip, + const int batch_size, const int cell_size) { const int batch_id = blockIdx.x * blockDim.x + threadIdx.x; const int act_id = blockIdx.y * blockDim.y + threadIdx.y; + T forget_bias_t = strict_cast(forget_bias); + T cell_clip_t = strict_cast(cell_clip); + if (batch_id >= batch_size || act_id >= cell_size) return; // The following code assumes the input arrays are of the following @@ -115,16 +161,16 @@ __global__ void lstm_gates(const T* icfo, const T* b, const T* cs_prev, T f_local; if (use_peephole) { f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] + - forget_bias + cs_prev[cid] * wcf[act_id]); + forget_bias_t + cs_prev[cid] * wcf[act_id]); } else { f_local = sigmoid_op(icfo[2 * cell_size + gid] + b[2 * cell_size + act_id] + - forget_bias); + forget_bias_t); } f[cid] = f_local; T cs_local = i_local * ci_local + f_local * cs_prev[cid]; - if (cell_clip > 0.0) { - cs_local = clip_op(cs_local, cell_clip); + if (cell_clip_t > strict_cast(0.0f)) { + cs_local = clip_op(cs_local, cell_clip_t); } cs[cid] = cs_local; @@ -174,8 +220,8 @@ __global__ void concat_xh(T* xh, const T* x, const T* h_prev, template void LSTMBlockCellFpropWithCUDA( - OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, + OpKernelContext* ctx, const GPUDevice& d, const float forget_bias, + const float cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, @@ -202,7 +248,7 @@ void LSTMBlockCellFpropWithCUDA( // states1 = xh * w typename TTypes::ConstMatrix const_xh(xh.data(), xh.dimensions()); TensorBlasGemm::compute( - ctx, d, false, false, T(1), const_xh, w, T(0), icfo); + ctx, d, false, false, 1.f, const_xh, w, 0.f, icfo); // Add bias, apply non-linearities and gating. // @@ -357,8 +403,9 @@ void LSTMBlockCellBpropWithCUDA( template struct TensorAdd; \ template <> \ void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, \ - const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ + OpKernelContext* ctx, const GPUDevice& d, const float forget_bias, \ + const float cell_clip, bool use_peephole, \ + typename TTypes::ConstMatrix x, \ typename TTypes::ConstMatrix cs_prev, \ typename TTypes::ConstMatrix h_prev, \ typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ @@ -368,10 +415,10 @@ void LSTMBlockCellBpropWithCUDA( typename TTypes::Matrix f, typename TTypes::Matrix o, \ typename TTypes::Matrix ci, typename TTypes::Matrix co, \ typename TTypes::Matrix icfo, typename TTypes::Matrix h) { \ - LSTMBlockCellFpropWithCUDA(ctx, d, forget_bias, cell_clip, use_peephole, \ - x, cs_prev, h_prev, w, wci, wcf, wco, b, xh, i, \ - cs, f, o, ci, co, icfo, h, batch_size_, \ - cell_size_, input_size_); \ + LSTMBlockCellFpropWithCUDA(ctx, d, forget_bias, cell_clip, \ + use_peephole, x, cs_prev, h_prev, w, wci, \ + wcf, wco, b, xh, i, cs, f, o, ci, co, icfo, \ + h, batch_size_, cell_size_, input_size_); \ } \ template <> \ void LSTMBlockCellBprop::operator()( \ @@ -403,6 +450,7 @@ void LSTMBlockCellBpropWithCUDA( template struct BlockLSTMBprop; DEFINE_GPU_SPECS(float); +DEFINE_GPU_SPECS(Eigen::half); // DEFINE_GPU_SPECS(double); #undef DEFINE_GPU_SPECS diff --git a/tensorflow/contrib/rnn/ops/lstm_ops.cc b/tensorflow/contrib/rnn/ops/lstm_ops.cc index 699cc6c88a..1679e35518 100644 --- a/tensorflow/contrib/rnn/ops/lstm_ops.cc +++ b/tensorflow/contrib/rnn/ops/lstm_ops.cc @@ -41,7 +41,7 @@ REGISTER_OP("LSTMBlockCell") .Attr("forget_bias: float = 1.0") .Attr("cell_clip: float = 3.0") .Attr("use_peephole: bool = false") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, cs_prev; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x)); @@ -128,7 +128,7 @@ REGISTER_OP("LSTMBlockCellGrad") .Output("wcf_grad: T") .Output("wco_grad: T") .Attr("use_peephole: bool") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, cs_prev; TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &x)); @@ -196,7 +196,7 @@ REGISTER_OP("BlockLSTM") .Attr("forget_bias: float = 1.0") .Attr("cell_clip: float = 3.0") .Attr("use_peephole: bool = false") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, b; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x)); @@ -288,7 +288,7 @@ REGISTER_OP("BlockLSTMGrad") .Output("wco_grad: T") .Output("b_grad: T") .Attr("use_peephole: bool") - .Attr("T: {float}") + .Attr("T: {half, float}") .SetShapeFn([](InferenceContext* c) { ShapeHandle x, cs_prev, h_prev, w, wci, wco, wcf, b; TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 3, &x)); diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py index 9e61fc54d1..f645165efe 100644 --- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py +++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py @@ -596,6 +596,7 @@ class LSTMBlockFusedCell(LSTMBlockWrapper): cell_clip=None, use_peephole=False, reuse=None, + dtype=None, name="lstm_fused_cell"): """Initialize the LSTM cell. @@ -607,12 +608,14 @@ class LSTMBlockFusedCell(LSTMBlockWrapper): reuse: (optional) boolean describing whether to reuse variables in an existing scope. If not `True`, and the existing scope already has the given variables, an error is raised. + dtype: the dtype of variables of this layer. name: String, the name of the layer. Layers with the same name will share weights, but to avoid mistakes we require reuse=True in such cases. By default this is "lstm_cell", for variable-name compatibility with `tf.nn.rnn_cell.LSTMCell`. """ - super(LSTMBlockFusedCell, self).__init__(_reuse=reuse, name=name) + super(LSTMBlockFusedCell, self).__init__( + _reuse=reuse, name=name, dtype=dtype) self._num_units = num_units self._forget_bias = forget_bias self._cell_clip = cell_clip if cell_clip is not None else -1 -- GitLab From 40dd7b0096f3e344444766169617a57ce410fd17 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 25 Oct 2018 14:06:44 -0700 Subject: [PATCH 0108/1078] Upgrade setuptools before installing absl-py. (#23266) PiperOrigin-RevId: 218471042 --- .../tools/ci_build/install/install_pip_packages.sh | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/tensorflow/tools/ci_build/install/install_pip_packages.sh b/tensorflow/tools/ci_build/install/install_pip_packages.sh index 7f293e8604..2c142041f3 100755 --- a/tensorflow/tools/ci_build/install/install_pip_packages.sh +++ b/tensorflow/tools/ci_build/install/install_pip_packages.sh @@ -29,6 +29,11 @@ easy_install3 -U pip==9.0.3 pip2 install wheel==0.31.1 pip3 install wheel==0.31.1 +# Install last working version of setuptools. This must happen before we install +# absl-py, which uses install_requires notation introduced in setuptools 20.5. +pip2 install --upgrade setuptools==39.1.0 +pip3 install --upgrade setuptools==39.1.0 + pip2 install virtualenv pip3 install virtualenv @@ -112,10 +117,6 @@ pip3 install --upgrade gast pip2 install --upgrade termcolor pip3 install --upgrade termcolor -# Install last working version of setuptools. -pip2 install --upgrade setuptools==39.1.0 -pip3 install --upgrade setuptools==39.1.0 - # Keras pip2 install keras_applications==1.0.6 --no-deps pip3 install keras_applications==1.0.6 --no-deps @@ -123,7 +124,3 @@ pip2 install keras_preprocessing==1.0.5 --no-deps pip3 install keras_preprocessing==1.0.5 --no-deps pip2 install --upgrade h5py==2.8.0 pip3 install --upgrade h5py==2.8.0 - -# Install last working version of setuptools. -pip2 install --upgrade setuptools==39.1.0 -pip3 install --upgrade setuptools==39.1.0 -- GitLab From b58290fc603760724dc4fb55585ad81094204f56 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 25 Oct 2018 18:11:56 -0700 Subject: [PATCH 0109/1078] Allow empty GCS tokens to be cached. (#23275) PiperOrigin-RevId: 217159671 --- tensorflow/core/platform/cloud/google_auth_provider.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/platform/cloud/google_auth_provider.cc b/tensorflow/core/platform/cloud/google_auth_provider.cc index 6ffe51e897..e15400780a 100644 --- a/tensorflow/core/platform/cloud/google_auth_provider.cc +++ b/tensorflow/core/platform/cloud/google_auth_provider.cc @@ -135,8 +135,7 @@ Status GoogleAuthProvider::GetToken(string* t) { mutex_lock lock(mu_); const uint64 now_sec = env_->NowSeconds(); - if (!current_token_.empty() && - now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) { + if (now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) { *t = current_token_; return Status::OK(); } -- GitLab From f90c2141ce5417e26bbf3dbcae426a8987cb60f1 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 25 Oct 2018 18:36:15 -0700 Subject: [PATCH 0110/1078] Upgrade setuptools before clean pip install pulls in absl-py. (#23276) absl-py recently added a version dependency to the package, causing install to fail on the old setuptools PiperOrigin-RevId: 218783878 --- tensorflow/tools/ci_build/builds/pip.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index 6543779022..d1fad98ed7 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -321,6 +321,12 @@ create_activate_virtualenv_and_install_tensorflow() { # some versions in python curl https://bootstrap.pypa.io/get-pip.py | python + # Force upgrade of setuptools. This must happen before the pip install of the + # WHL_PATH, which pulls in absl-py, which uses install_requires notation + # introduced in setuptools >=20.5. The default version of setuptools is 5.5.1, + # which is too old for absl-py. + pip install --upgrade setuptools==39.1.0 + # Force tensorflow reinstallation. Otherwise it may not get installed from # last build if it had the same version number as previous build. PIP_FLAGS="--upgrade --force-reinstall" @@ -328,9 +334,11 @@ create_activate_virtualenv_and_install_tensorflow() { die "pip install (forcing to reinstall tensorflow) FAILED" echo "Successfully installed pip package ${TF_WHEEL_PATH}" - # Force downgrade setuptools. + # Force downgrade of setuptools. This must happen after the pip install of the + # WHL_PATH, which ends up upgrading to the latest version of setuptools. + # Versions of setuptools >= 39.1.0 will cause tests to fail like this: + # ImportError: cannot import name py31compat pip install --upgrade setuptools==39.1.0 - } ################################################################################ -- GitLab From 684ce69f568e89e34883d91b65c3983c333e53d9 Mon Sep 17 00:00:00 2001 From: Karl Lessard Date: Thu, 25 Oct 2018 22:18:22 -0400 Subject: [PATCH 0111/1078] Prevent memory leak by storing strings instead of StringPiece in vector --- tensorflow/core/common_runtime/eager/attr_builder.cc | 2 +- tensorflow/core/common_runtime/eager/attr_builder.h | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/common_runtime/eager/attr_builder.cc b/tensorflow/core/common_runtime/eager/attr_builder.cc index 5c8369de87..e9f2188656 100644 --- a/tensorflow/core/common_runtime/eager/attr_builder.cc +++ b/tensorflow/core/common_runtime/eager/attr_builder.cc @@ -99,7 +99,7 @@ Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out) { #define DEFINE_SET_ATTR(value_type, value_field) \ template <> \ AttrBuilder& AttrBuilder::Set(StringPiece attr_name, value_type&& value) { \ - value_field.push_back(std::make_pair(attr_name, value)); \ + value_field.push_back(std::make_pair(string(attr_name), value)); \ return *this; \ } diff --git a/tensorflow/core/common_runtime/eager/attr_builder.h b/tensorflow/core/common_runtime/eager/attr_builder.h index c114ea4ba0..a48ba686d2 100644 --- a/tensorflow/core/common_runtime/eager/attr_builder.h +++ b/tensorflow/core/common_runtime/eager/attr_builder.h @@ -96,7 +96,7 @@ class AttrBuilder { template AttrBuilder& Set(StringPiece attr_name, T&& value) { MayBeInitializeNodeDef(); - SetInAttrValueMap(node_def_->mutable_attr(), attr_name, value); + SetInAttrValueMap(node_def_->mutable_attr(), string(attr_name), value); return *this; } @@ -107,7 +107,7 @@ class AttrBuilder { private: template - using AttrVec = tensorflow::gtl::InlinedVector, 2>; + using AttrVec = tensorflow::gtl::InlinedVector, 2>; void MayBeInitializeNodeDef(); // Fill `m` with the attr-value pairs set via AttrBuilder::Set() so far, as @@ -119,7 +119,7 @@ class AttrBuilder { void FillAttrValueMap(AttrValueMap* m, bool include_those_in_node_def) const; template - void SetInAttrValueMap(AttrValueMap* m, StringPiece attr_name, + void SetInAttrValueMap(AttrValueMap* m, const string& attr_name, T&& value) const { DCHECK(!node_def_finalized_) << "Calling SetInAttrValueMap after BuildNodeDef."; @@ -128,12 +128,12 @@ class AttrBuilder { AttrValue attr_value; if (found == nullptr) { SetAttrValue(value, &attr_value); - m->insert(AttrValueMap::value_type(string(attr_name), attr_value)); + m->insert(AttrValueMap::value_type(attr_name, attr_value)); } else { // TODO(ashankar): Do what is done in // NodeDefBuilder::CheckInconsistency(attr_name, *found, attr_value); SetAttrValue(std::forward(value), &attr_value); - (*m)[string(attr_name)] = attr_value; + (*m)[attr_name] = attr_value; } } -- GitLab From 748435b8ef55a554e011e97a9f893304e737775a Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 25 Oct 2018 20:26:00 -0700 Subject: [PATCH 0112/1078] Fixed the issue that each invocation of model.fit/evaluate/predict modifies the (#23280) graph. PiperOrigin-RevId: 218793646 --- .../contrib/tpu/python/tpu/keras_support.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py index d628258b9d..a8eb3aa892 100644 --- a/tensorflow/contrib/tpu/python/tpu/keras_support.py +++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py @@ -97,14 +97,25 @@ from tensorflow.python.platform import tf_logging as logging # TODO(b/114775106): temporary shim to optionally initialize the TPU # This increases the odds our session is initialized, but shouldn't be needed. +_TEST_REWRITE_OP = None + + def _maybe_initialize_tpu(session): """Initialize the TPU if it has not already been initialized.""" + global _TEST_REWRITE_OP try: + # Try to use cached version to avoid another ground of graph optimization. + test_rewrite_op = _TEST_REWRITE_OP + if (test_rewrite_op is None or + test_rewrite_op[0].graph != ops.get_default_graph()): + + def test_op(): + return constant_op.constant(1) + constant_op.constant(1) - def test_op(): - return constant_op.constant(1) + constant_op.constant(1) + test_rewrite_op = tpu.rewrite(test_op) + _TEST_REWRITE_OP = test_rewrite_op - session.run(tpu.rewrite(test_op)) + session.run(test_rewrite_op) except errors.FailedPreconditionError as _: session.run(tpu.initialize_system()) -- GitLab From 901912bdaa9668a4e4fa0e90b873b0f9d2d717f6 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> Date: Fri, 26 Oct 2018 12:45:57 +0800 Subject: [PATCH 0113/1078] Replace "=>" with "into" for readability. Co-Authored-By: wenxizhu --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index e041ab14ca..374c74c903 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -515,7 +515,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // // Add rules to fuse sequences such as "Transpose (NCHW -> NHWC) + Conv2D // (NHWC) + Transpose (NHWC-> - // NCHW) " => "Conv2D (NCHW). Such patterns occur frequently in Keras. + // NCHW)" into "Conv2D (NCHW)". Such patterns occur frequently in Keras. // Note: we use the term "merge" is to combine (exactly) 2 nodes into one, // while "fusion" is // for 3+ nodes situation. -- GitLab From 906a3527b9f4be3446a1f9e49332555bb3dd93cc Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> Date: Fri, 26 Oct 2018 12:46:26 +0800 Subject: [PATCH 0114/1078] Fix a grammatically incorrect in comments. Co-Authored-By: wenxizhu --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 374c74c903..be91763a99 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -516,7 +516,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // Add rules to fuse sequences such as "Transpose (NCHW -> NHWC) + Conv2D // (NHWC) + Transpose (NHWC-> // NCHW)" into "Conv2D (NCHW)". Such patterns occur frequently in Keras. - // Note: we use the term "merge" is to combine (exactly) 2 nodes into one, + // Note: we use the term "merge" to combine (exactly) 2 nodes into one, // while "fusion" is // for 3+ nodes situation. // -- GitLab From b7ecda56d59248845103399a2eae73348995d63b Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 12:59:01 +0800 Subject: [PATCH 0115/1078] Merge the line with its previous line. --- tensorflow/core/graph/mkl_layout_pass.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index be91763a99..b0ae480d3d 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -517,8 +517,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // (NHWC) + Transpose (NHWC-> // NCHW)" into "Conv2D (NCHW)". Such patterns occur frequently in Keras. // Note: we use the term "merge" to combine (exactly) 2 nodes into one, - // while "fusion" is - // for 3+ nodes situation. + // while "fusion" is for 3+ nodes situation. // // Transpose + Conv2d + Transpose: -- GitLab From 932a04eed44a7c02181245210c72decb8d12e963 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 13:00:30 +0800 Subject: [PATCH 0116/1078] 2+ should be 3+. --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index b0ae480d3d..2891979be2 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -592,7 +592,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { std::function get_node_to_be_merged; } MergeInfo; - // structure to specify information used in node fusion of 2+ operators + // structure to specify information used in node fusion of 3+ operators typedef struct { std::string pattern_name; // name to describe this pattern, such as // "Transpose_Mklop_Transpose". -- GitLab From f60f477407fb3e203146abdb6ec7fce6b9735d50 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 13:03:11 +0800 Subject: [PATCH 0117/1078] Capitalized the beginning of the sentences. --- tensorflow/core/graph/mkl_layout_pass.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 2891979be2..77715b1515 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -592,12 +592,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { std::function get_node_to_be_merged; } MergeInfo; - // structure to specify information used in node fusion of 3+ operators + // Structure to specify information used in node fusion of 3+ operators typedef struct { - std::string pattern_name; // name to describe this pattern, such as + std::string pattern_name; // Name to describe this pattern, such as // "Transpose_Mklop_Transpose". std::vector > - node_checkers; // extra restriction checker for these ops + node_checkers; // Extra restriction checker for these ops std::function*, std::vector&, std::function)> @@ -606,7 +606,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { } FusionInfo; // - // dimension indices for 2D tensor. + // Dimension indices for 2D tensor. // struct NCHW { enum dim { N = 0, C = 1, H = 2, W = 3 }; -- GitLab From a3e878dfb91d83d788d1901c863660f1fa51c951 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 13:05:07 +0800 Subject: [PATCH 0118/1078] Remove empty comment lines. --- tensorflow/core/graph/mkl_layout_pass.cc | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 77715b1515..c04887c36d 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -903,37 +903,28 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string data_format); static bool CheckForTranspose(const Node* node, std::vector perm) { - // // Check node node, to see if it's "Transpose" - // if (node->type_string() != "Transpose") return false; - // // Check if has out control edge. If true, this is a training graph. // Currently we focus on inference and do no fusion in training. - // for (const Edge* e : node->out_edges()) { if (e->IsControlEdge()) { return false; } } - // // If "Transpose" has input control edges, don't fuse on it. - // for (const Edge* e : node->in_edges()) { if (e->IsControlEdge()) { return false; } } - // // If "Transpose" has multiple output data edges, also don't fuse it. - // if (node->num_outputs() > 1 || node->out_edges().size() > 1) return false; // Check "perm" attribute, make sure it's what we want. - // for (const Edge* e : node->in_edges()) { if (!e->IsControlEdge()) { const Node* perm_node = e->src(); @@ -948,11 +939,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { DataType type; GetNodeAttr(perm_node->def(), "dtype", &type); - // // Here we directly access to the "tensor_context", rather than // "int_val". This is because we find "int_val" is // not set properly under some circumstances. - // if (type == DT_INT32) { const int type_size = 4; const int* tensor_content = -- GitLab From 299c1649d1d48bc8d1e6a7fad505c20f2baaae8b Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> Date: Fri, 26 Oct 2018 13:12:54 +0800 Subject: [PATCH 0119/1078] Fix a grammatically incorrect comment line. Co-Authored-By: wenxizhu --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index c04887c36d..ce364c062d 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -903,7 +903,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string data_format); static bool CheckForTranspose(const Node* node, std::vector perm) { - // Check node node, to see if it's "Transpose" + // Check if node's type is "Transpose" if (node->type_string() != "Transpose") return false; // Check if has out control edge. If true, this is a training graph. -- GitLab From 43ccbbf8b2d2ac4d46f76ee72d000120512358d3 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> Date: Fri, 26 Oct 2018 13:21:03 +0800 Subject: [PATCH 0120/1078] Use "CHECK_EQ" instead of "CHECK". Co-Authored-By: wenxizhu --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index ce364c062d..78b1353997 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -934,7 +934,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { e->dst_input() == kPermTensorIndex) { // we find the "perm" node, now try to retrieve its value. const TensorProto* proto = nullptr; - CHECK_EQ(GetNodeAttr(perm_node->def(), "value", &proto).ok(), true); + CHECK(GetNodeAttr(perm_node->def(), "value", &proto).ok()); DataType type; GetNodeAttr(perm_node->def(), "dtype", &type); -- GitLab From cd31f0bcd6b52b8d9bd5d8430bddd77388a4fff0 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 13:27:47 +0800 Subject: [PATCH 0121/1078] Explain the high level idea of comparing 'perm' and 'perm_node'. --- tensorflow/core/graph/mkl_layout_pass.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 78b1353997..21ceb66fca 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -924,7 +924,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // If "Transpose" has multiple output data edges, also don't fuse it. if (node->num_outputs() > 1 || node->out_edges().size() > 1) return false; - // Check "perm" attribute, make sure it's what we want. + // We compared the tensor containing the permutation order ("perm_node") + // with our desired order ("perm"). If they're exactly match, this check + // succeed and returns true. for (const Edge* e : node->in_edges()) { if (!e->IsControlEdge()) { const Node* perm_node = e->src(); @@ -934,7 +936,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { e->dst_input() == kPermTensorIndex) { // we find the "perm" node, now try to retrieve its value. const TensorProto* proto = nullptr; - CHECK(GetNodeAttr(perm_node->def(), "value", &proto).ok()); + CHECK_EQ(GetNodeAttr(perm_node->def(), "value", &proto).ok(), true); DataType type; GetNodeAttr(perm_node->def(), "dtype", &type); -- GitLab From d667d30ff7132db51537c38984c97a589d9e7612 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> Date: Fri, 26 Oct 2018 13:46:25 +0800 Subject: [PATCH 0122/1078] Fix a typo. Co-Authored-By: wenxizhu --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 21ceb66fca..bce9995371 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -941,7 +941,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { DataType type; GetNodeAttr(perm_node->def(), "dtype", &type); - // Here we directly access to the "tensor_context", rather than + // Here we directly access to the "tensor_content", rather than // "int_val". This is because we find "int_val" is // not set properly under some circumstances. if (type == DT_INT32) { -- GitLab From a11786c809cd013fd4d4f97c94ce94a524b5bc17 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 13:52:05 +0800 Subject: [PATCH 0123/1078] Remove some empty lines. --- tensorflow/core/graph/mkl_layout_pass.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index bce9995371..76ae4faa45 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -955,7 +955,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { tensor_content + tensor_content_size); return perm_value == perm; - } else if (type == DT_INT64) { const int type_size = 8; const long* tensor_content = @@ -969,12 +968,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return perm_value == long_perm; } - return false; } } } - return false; } -- GitLab From 4f0c4445c9e09b4360fbce8ce8f1b50722dced56 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 13:56:30 +0800 Subject: [PATCH 0124/1078] Check if node is nullptr. --- tensorflow/core/graph/mkl_layout_pass.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 76ae4faa45..909f84123c 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -976,6 +976,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { } static bool CheckForMklOp(const Node* node, string name = "") { + if (node == nullptr) return false; + if (!name.empty() && node->type_string() != name) { return false; } -- GitLab From ffc909edb6b1c4376ed2870d6d65fd4b5f17460f Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 14:01:03 +0800 Subject: [PATCH 0125/1078] Add description for FuseTransposeMklOpTranspose(). --- tensorflow/core/graph/mkl_layout_pass.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 909f84123c..78d6962cc3 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -897,6 +897,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { Status FuseNode(std::unique_ptr* g, std::vector& nodes, const MklLayoutRewritePass::FusionInfo fi); + // Fuse tranpose(to "NHWC") + mklop("NHWC") + transpose(to "NCHW") into mklop("NCHW"). + // Here "mklop" can be any MKL-DNN supported op, such as Conv2D. static Status FuseTransposeMklOpTranspose( std::unique_ptr* g, std::vector& nodes, std::function copy_attrs, @@ -977,7 +979,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { static bool CheckForMklOp(const Node* node, string name = "") { if (node == nullptr) return false; - + if (!name.empty() && node->type_string() != name) { return false; } -- GitLab From 738039381b33e1e39bf4089fa2c1341aeddb6308 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 14:03:00 +0800 Subject: [PATCH 0126/1078] Remove empty comment lines. --- tensorflow/core/graph/mkl_layout_pass.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 78d6962cc3..4cfe6aff1a 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -1810,12 +1810,9 @@ void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, NodeBuilder* nb, std::vector new_strides; std::vector new_dilations; if (strides.size() == 5) { - // // "strides" and "dilations" also need to be changed according to // "data_format", // in this case, is "NDHWC" to "NCDHW". - // - new_strides = {strides[NDHWC::dim::N], strides[NDHWC::dim::C], strides[NDHWC::dim::D], strides[NDHWC::dim::H], strides[NDHWC::dim::W]}; @@ -1827,11 +1824,9 @@ void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, NodeBuilder* nb, nb->Attr("dilations", new_dilations); } else { - // // "strides" and "dilations" also need to be changed according to // "data_format", // in this case, is "NHWC" to "NCHW". - // new_strides = {strides[NHWC::dim::N], strides[NHWC::dim::C], strides[NHWC::dim::H], strides[NHWC::dim::W]}; -- GitLab From c4ab34f66163bc8face7f588ebba94cadd306d6f Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 14:06:57 +0800 Subject: [PATCH 0127/1078] Move nb->Attr() out of if statement. --- tensorflow/core/graph/mkl_layout_pass.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 4cfe6aff1a..850a6968a3 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -1821,8 +1821,6 @@ void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, NodeBuilder* nb, new_dilations = {dilations[NDHWC::dim::N], dilations[NDHWC::dim::C], dilations[NDHWC::dim::D], dilations[NDHWC::dim::H], dilations[NDHWC::dim::W]}; - nb->Attr("dilations", new_dilations); - } else { // "strides" and "dilations" also need to be changed according to // "data_format", @@ -1834,8 +1832,8 @@ void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, NodeBuilder* nb, new_dilations = {dilations[NHWC::dim::N], dilations[NHWC::dim::C], dilations[NHWC::dim::H], dilations[NHWC::dim::W]}; - nb->Attr("dilations", new_dilations); } + nb->Attr("dilations", new_dilations); } } -- GitLab From aa14839dfd414adda0fb0260db10f24de18e1e1c Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> Date: Fri, 26 Oct 2018 14:17:04 +0800 Subject: [PATCH 0128/1078] Fix a variable name mis-spelling. Co-Authored-By: wenxizhu --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 850a6968a3..8b29f29746 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2657,7 +2657,7 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( gtl::InlinedVector transpose_nchw_control_edges; gtl::InlinedVector, 4> transpose_nchw_in( transpose_nchw_num_inputs); - FillInputs(transpose_to_nhwc, &transpose_nchw_control_edges, + FillInputs(transpose_to_nchw, &transpose_nchw_control_edges, &transpose_nchw_in); // We will use the node name of Conv2d as the name of new node -- GitLab From 8afe630a977c94c331c31daaca1d52da6df11303 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 14:42:35 +0800 Subject: [PATCH 0129/1078] Fix a typo. --- tensorflow/core/graph/mkl_layout_pass.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 8b29f29746..c6cb8552b7 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2657,7 +2657,7 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( gtl::InlinedVector transpose_nchw_control_edges; gtl::InlinedVector, 4> transpose_nchw_in( transpose_nchw_num_inputs); - FillInputs(transpose_to_nchw, &transpose_nchw_control_edges, + FillInputs(transpose_to_nhwc, &transpose_nchw_control_edges, &transpose_nchw_in); // We will use the node name of Conv2d as the name of new node @@ -2689,8 +2689,8 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( // Fill outputs. for (const Edge* e : transpose_to_nchw->out_edges()) { if (!e->IsControlEdge()) { - const int kConv2DWithBiasOutputSlot = 0; - CHECK_NOTNULL((*g)->AddEdge(new_node, kConv2DWithBiasOutputSlot, e->dst(), + const int kTransposeWithMklOpOutputSlot = 0; + CHECK_NOTNULL((*g)->AddEdge(new_node, kTransposeWithMklOpOutputSlot, e->dst(), e->dst_input())); } } -- GitLab From f05a5ac3a0a2418a7ab9e6b8a907b01b2335491a Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 14:58:53 +0800 Subject: [PATCH 0130/1078] Copy requested_device and assigned_device_name_index. --- tensorflow/core/graph/mkl_layout_pass.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index c6cb8552b7..125f7a15fb 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2698,6 +2698,10 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( // Copy device assigned to old node to new node. new_node->set_assigned_device_name(mklop->assigned_device_name()); + // Copy requested_device and assigned_device_name_index + new_node->set_requested_device(mklop->requested_device()); + new_node->set_assigned_device_name_index(mklop->assigned_device_name_index()); + (*g)->RemoveNode(transpose_to_nhwc); (*g)->RemoveNode(mklop); (*g)->RemoveNode(transpose_to_nchw); -- GitLab From 976a706932cfee6bcaa37970b07c064308524a2f Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 15:02:27 +0800 Subject: [PATCH 0131/1078] Remove an assert line. --- tensorflow/core/graph/mkl_layout_pass.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 125f7a15fb..cacf563fff 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2722,7 +2722,6 @@ MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { const FusionInfo* fi_ptr = nullptr; for (auto fi = finfo_.begin(); fi != finfo_.end(); ++fi) { - assert(fi->ops.size() == fi->node_checkers.size()); nodes.clear(); fi_ptr = &*fi; // -- GitLab From 0309943937acb3f9b0b7df011f94c3d55cd5b66c Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 26 Oct 2018 15:15:08 +0800 Subject: [PATCH 0132/1078] Fix a typo. --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index cacf563fff..a7f86e56da 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2657,7 +2657,7 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( gtl::InlinedVector transpose_nchw_control_edges; gtl::InlinedVector, 4> transpose_nchw_in( transpose_nchw_num_inputs); - FillInputs(transpose_to_nhwc, &transpose_nchw_control_edges, + FillInputs(transpose_to_nchw, &transpose_nchw_control_edges, &transpose_nchw_in); // We will use the node name of Conv2d as the name of new node -- GitLab From 1d94242feccdb2afd3583c0332de090eab2f3811 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Sat, 27 Oct 2018 00:04:52 +0800 Subject: [PATCH 0133/1078] Replace the greedy search algorithm in "CheckForNodeFusion()" with a stack-based one, to avoid missing some patterns. --- tensorflow/core/graph/mkl_layout_pass.cc | 78 +++++++++++++----------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index a7f86e56da..0ace4a1fd1 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -26,6 +26,8 @@ limitations under the License. #include #include #include +#include +#include #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/optimization_registry.h" @@ -2717,12 +2719,9 @@ Status MklLayoutRewritePass::FuseNode( std::tuple, const MklLayoutRewritePass::FusionInfo> MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { - bool found_pattern = false; - std::vector nodes; const FusionInfo* fi_ptr = nullptr; for (auto fi = finfo_.begin(); fi != finfo_.end(); ++fi) { - nodes.clear(); fi_ptr = &*fi; // // Make sure node "a" and its succeding nodes (b, c ...), match the pattern @@ -2730,51 +2729,60 @@ MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { // aka. "a->b->c" matches "op1->op2->op3" // - // Initialize "current_node" as node "a". - Node* current_node = a; - for (auto node_index = 0; node_index < fi->node_checkers.size(); - ++node_index) { - // Make sure current node meet the requirement of corresponding node - // checker. - auto check_node = fi->node_checkers[node_index]; - if (current_node == nullptr || - (check_node && check_node(current_node) == false)) { - found_pattern = false; - nodes.clear(); - break; - } + std::stack> work_stack; + std::set visited_nodes; + auto node_checker = fi->node_checkers.begin(); - // Add current_node to "fusion_nodes": - nodes.push_back(current_node); + Node *current_node = nullptr; + if (a != nullptr) { + work_stack.push(a); + } - // If current node is not the last node we want to check, check next node. - if (node_index != fi->node_checkers.size() - 1) { - // Find current node's direct descendant, which will be used in next - // iteration. - auto check_next_node = fi->node_checkers[node_index + 1]; - for (const Edge* e : current_node->out_edges()) { + while (!work_stack.empty()) { + current_node = work_stack.top(); + + if ((*node_checker)(current_node)){ + if (node_checker == (fi->node_checkers.end() - 1)) { + // We find a match, break and return. + std::vector nodes; + while (!work_stack.empty()) { + nodes.insert(nodes.begin(), work_stack.top()); + work_stack.pop(); + } + + return make_tuple(true, nodes, *fi_ptr); + } + + bool all_succ_has_been_visited = true; + for (const Edge *e : current_node->out_edges()) { if (!e->IsControlEdge()) { - Node* candidate_node = e->dst(); + Node *candidate_node = e->dst(); - if (check_next_node(candidate_node) == false) { - current_node = nullptr; - } else { - current_node = candidate_node; + // If the candidate node has not been visited, push it to stack. + if (visited_nodes.find(candidate_node) == visited_nodes.end()) { + work_stack.push(candidate_node); + ++ node_checker; + all_succ_has_been_visited = false; break; } + + // All successor nodes of current node has been visited (no match found), + // pop the stack and mark current node as "visited". + if (all_succ_has_been_visited) { + visited_nodes.insert(current_node); + work_stack.pop(); + -- node_checker; + } } } } else { - found_pattern = true; + // current node doesn't match, just break and stack will help us roll back. + break; } } - - if (found_pattern == true) { - break; - } } - return make_tuple(found_pattern, nodes, *fi_ptr); + return make_tuple(false, std::vector(), *fi_ptr); } /////////////////////////////////////////////////////////////////////////////// -- GitLab From 59617ccaca8c5980f5418a0b612b040ac8d1afba Mon Sep 17 00:00:00 2001 From: Ouwen Huang Date: Tue, 30 Oct 2018 05:37:22 +0000 Subject: [PATCH 0134/1078] Added note on weight decay for tf.contrib.opt optimizers. --- .../python/training/weight_decay_optimizers.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py b/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py index 200b0d2008..1e8351b70f 100644 --- a/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py +++ b/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py @@ -59,6 +59,23 @@ class DecoupledWeightDecayExtension(object): Note that this extension decays weights BEFORE applying the update based on the gradient, i.e. this extension only has the desired behaviour for optimizers which do not depend on the value of'var' in the update step! + + Note: when applying a decay to the learning rate, be sure to manually apply + the decay to the `weight_decay` as well. For example: + + ```python + decay = tf.train.piecewise_constant(tf.train.get_global_step(), + [10000, 15000], [1e-1, 1e-2, 1e-3]) + lr = 1*decay + wd = 1e-4*decay + + # ... + + optimizer = tf.contrib.opt.MomentumWOptimizer(learning_rate=lr, + weight_decay=wd, + momentum=0.9, + use_nesterov=True) + ``` """ def __init__(self, weight_decay, **kwargs): -- GitLab From da4235299bd7e0089108634074b659d353102969 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 31 Oct 2018 10:54:36 +0800 Subject: [PATCH 0135/1078] Add a comment to the constraint of "inference-only" to note it will eventually be removed, if we enabled this fusion for training in the future. --- tensorflow/core/graph/mkl_layout_pass.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 0ace4a1fd1..30874dcf9e 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -912,6 +912,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // Check if has out control edge. If true, this is a training graph. // Currently we focus on inference and do no fusion in training. + // Note: this constraint will eventually be removed, if we enabled this fusion for training + // in the future. for (const Edge* e : node->out_edges()) { if (e->IsControlEdge()) { return false; -- GitLab From aab29e70ec2de097a04fb36aa2a60e2d286be1de Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 31 Oct 2018 11:26:48 +0800 Subject: [PATCH 0136/1078] Move a cheaper early return to the top of the function. --- tensorflow/core/graph/mkl_layout_pass.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 30874dcf9e..31d11d4aaf 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -910,6 +910,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // Check if node's type is "Transpose" if (node->type_string() != "Transpose") return false; + // If "Transpose" has multiple output data edges, also don't fuse it. + if (node->num_outputs() > 1 || node->out_edges().size() > 1) return false; + // Check if has out control edge. If true, this is a training graph. // Currently we focus on inference and do no fusion in training. // Note: this constraint will eventually be removed, if we enabled this fusion for training @@ -927,9 +930,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { } } - // If "Transpose" has multiple output data edges, also don't fuse it. - if (node->num_outputs() > 1 || node->out_edges().size() > 1) return false; - // We compared the tensor containing the permutation order ("perm_node") // with our desired order ("perm"). If they're exactly match, this check // succeed and returns true. -- GitLab From 6e3d7a22a7fa5622f32cee16a9162feab8f5a376 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 31 Oct 2018 11:31:43 +0800 Subject: [PATCH 0137/1078] Set strides outside of if-else. --- tensorflow/core/graph/mkl_layout_pass.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 31d11d4aaf..0df78ddc6f 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -1820,7 +1820,6 @@ void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, NodeBuilder* nb, new_strides = {strides[NDHWC::dim::N], strides[NDHWC::dim::C], strides[NDHWC::dim::D], strides[NDHWC::dim::H], strides[NDHWC::dim::W]}; - nb->Attr("strides", new_strides); new_dilations = {dilations[NDHWC::dim::N], dilations[NDHWC::dim::C], dilations[NDHWC::dim::D], dilations[NDHWC::dim::H], @@ -1832,11 +1831,12 @@ void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, NodeBuilder* nb, new_strides = {strides[NHWC::dim::N], strides[NHWC::dim::C], strides[NHWC::dim::H], strides[NHWC::dim::W]}; - nb->Attr("strides", new_strides); + new_dilations = {dilations[NHWC::dim::N], dilations[NHWC::dim::C], dilations[NHWC::dim::H], dilations[NHWC::dim::W]}; } + nb->Attr("strides", new_strides); nb->Attr("dilations", new_dilations); } } -- GitLab From 753c474ba8fbbb11d213fe650240b5f30d074058 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 31 Oct 2018 11:38:29 +0800 Subject: [PATCH 0138/1078] Fix a comment line. --- tensorflow/core/graph/mkl_layout_pass.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 0df78ddc6f..82e714fdf9 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2664,8 +2664,7 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( FillInputs(transpose_to_nchw, &transpose_nchw_control_edges, &transpose_nchw_in); - // We will use the node name of Conv2d as the name of new node - // Build new node. We use same name as original node, but change the op + // We use same name as original node, but change the op // name. NodeBuilder nb(mklop->name(), mklop->type_string()); -- GitLab From fa54ac8e616127862bdb2f9f0c3e9324274e360d Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 31 Oct 2018 13:03:30 +0800 Subject: [PATCH 0139/1078] Add a comment to clarify that patterns in finfo_ shows up first will get applied first. --- tensorflow/core/graph/mkl_layout_pass.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 82e714fdf9..f726f01ce6 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -514,6 +514,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.conv2d_grad_filter_with_bias, GetConv2DBackpropFilterOrBiasAddGrad}); + // The fusion patterns in "finfo_" that show up first will get applied first, + // for example, graph "A->B->C-D" and finfo_ is {A->B->C to ABC, A->B->C->D to ABCD}, + // since the first gets applied first, the final graph will be ABC->D. + // // Add rules to fuse sequences such as "Transpose (NCHW -> NHWC) + Conv2D // (NHWC) + Transpose (NHWC-> -- GitLab From a554e9d7f8dfcc562b568c678f93a88185d7dd05 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 31 Oct 2018 13:12:04 +0800 Subject: [PATCH 0140/1078] Use std::unordered_set instead of std::set, for better performance. --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index f726f01ce6..305b80df34 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2735,7 +2735,7 @@ MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { // std::stack> work_stack; - std::set visited_nodes; + std::unordered_set visited_nodes; auto node_checker = fi->node_checkers.begin(); Node *current_node = nullptr; -- GitLab From 13ed0286c40c81f23a73a9cd773da4d1dd27197a Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 31 Oct 2018 13:35:28 +0800 Subject: [PATCH 0141/1078] Fix 2 bugs in CheckNodeForFusion(): 1. The else case for node_checker fails are not handled properly. Should pop the stack, rather then break the while loop entirely. 2. The nested level of stack op when node_check succeed is wrong. --- tensorflow/core/graph/mkl_layout_pass.cc | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 305b80df34..65bd568f6c 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2770,19 +2770,22 @@ MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { all_succ_has_been_visited = false; break; } - - // All successor nodes of current node has been visited (no match found), - // pop the stack and mark current node as "visited". - if (all_succ_has_been_visited) { - visited_nodes.insert(current_node); - work_stack.pop(); - -- node_checker; - } } } + + // All successor nodes of current node has been visited (no match found), + // pop the stack and mark current node as "visited". + if (all_succ_has_been_visited) { + visited_nodes.insert(current_node); + work_stack.pop(); + -- node_checker; + } + } else { - // current node doesn't match, just break and stack will help us roll back. - break; + // current node doesn't match, pop stack to roll back. + visited_nodes.insert(current_node); + work_stack.pop(); + -- node_checker; } } } -- GitLab From 0fb33d8f232eff875aac4379a2bd347fbd0ef8e1 Mon Sep 17 00:00:00 2001 From: "Meng, Peng" Date: Thu, 1 Nov 2018 23:00:07 +0800 Subject: [PATCH 0142/1078] fix softmax Change-Id: Ic882c0c071c650400a3aadb9025b37381c762262 --- tensorflow/core/kernels/mkl_softmax_op.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index 92167e06d5..6ff27b1957 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -63,7 +63,13 @@ class MklSoftmaxOp : public OpKernel { : src_tensor.shape(); const int input_dims = src_tf_shape.dims(); auto src_dims = TFShapeToMklDnnDims(src_tf_shape); - auto output_dims = src_dims; + memory::dims output_dims; + if(src_mkl_shape.IsMklTensor()) { + output_dims = src_mkl_shape.GetSizesAsMklDnnDims(); + } + else { + output_dims = src_dims; //nhwc + } memory::format layout_type; // In MKL, data format passed to mkl softmax op depends on dimension of the input tensor. // Here "x" data format in MKL is used for 1 dim tensor, "nc" for 2 dim tensor, @@ -82,10 +88,10 @@ class MklSoftmaxOp : public OpKernel { layout_type = memory::format::tnc; break; case 4: - layout_type = memory::format::nchw; + layout_type = memory::format::nhwc; break; case 5: - layout_type = memory::format::ncdhw; + layout_type = memory::format::ndhwc; break; default: OP_REQUIRES_OK(context, errors::Aborted("Input dims must be <= 5 and >=1")); -- GitLab From 4cdcadc62394e3f07520e0a04208a6916f178f42 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 1 Nov 2018 16:56:56 -0700 Subject: [PATCH 0143/1078] AsyncCheckpoints: Add missing 'self' arg to write_graph_fn. (#23439) PiperOrigin-RevId: 219365527 --- tensorflow/contrib/tpu/python/tpu/async_checkpoint.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py index 78253d83fc..c32bd5997c 100644 --- a/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py +++ b/tensorflow/contrib/tpu/python/tpu/async_checkpoint.py @@ -102,7 +102,8 @@ class AsyncCheckpointSaverHook(basic_session_run_hooks.CheckpointSaverHook): training_util.write_graph( ops.get_default_graph().as_graph_def(add_shapes=True), self._checkpoint_dir, "graph.pbtxt") - self._write_graph_thread = threading.Thread(target=_write_graph_fn) + self._write_graph_thread = threading.Thread(target=_write_graph_fn, + args=[self]) self._write_graph_thread.start() saver_def = self._get_saver().saver_def if self._get_saver() else None -- GitLab From 8ce231a8ebc73be5be53ccd90387fc68b187bcec Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 1 Nov 2018 18:12:13 -0700 Subject: [PATCH 0144/1078] Update version to 1.12.0 final (#23444) --- tensorflow/core/public/version.h | 2 +- tensorflow/tools/pip_package/setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 500ec8f97b..a55fe17dd5 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc2" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index ceaa96b690..036830dd22 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.12.0-rc2' +_VERSION = '1.12.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From a6d8ffae097d0132989ae4688d224121ec6d8f35 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Thu, 1 Nov 2018 18:35:10 -0700 Subject: [PATCH 0145/1078] Fix a bug in tpu.py and xla.py that while creating an identity node for control input edges under rewrite context, the parent control flow context is lost. (#23446) PiperOrigin-RevId: 219724472 --- tensorflow/contrib/compiler/xla.py | 13 +++++-------- tensorflow/contrib/tpu/python/tpu/tpu.py | 13 +++++-------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py index 873b03580d..83d9d8c54a 100644 --- a/tensorflow/contrib/compiler/xla.py +++ b/tensorflow/contrib/compiler/xla.py @@ -179,14 +179,11 @@ class XLACompileContext(control_flow_ops.XLAControlFlowContext): if external_control_inputs: # Use an identity to pull control inputs as data inputs. Note that we # ignore ops which don't have outputs. TODO(phawkins): fix that. - with ops.control_dependencies(None): - self.Enter() - external_control_inputs = [ - array_ops.identity(x.outputs[0]).op - for x in external_control_inputs - if x.outputs - ] - self.Exit() + external_control_inputs = [ + array_ops.identity(x.outputs[0]).op + for x in external_control_inputs + if x.outputs + ] # pylint: disable=protected-access op._add_control_inputs(external_control_inputs) # pylint: enable=protected-access diff --git a/tensorflow/contrib/tpu/python/tpu/tpu.py b/tensorflow/contrib/tpu/python/tpu/tpu.py index 11aaa1c66a..a5ccaa071b 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu.py @@ -371,14 +371,11 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext): if external_control_inputs: # Use an identity to pull control inputs as data inputs. Note that we # ignore ops which don't have outputs. TODO(phawkins): fix that. - with ops.control_dependencies(None): - self.Enter() - external_control_inputs = [ - array_ops.identity(x.outputs[0]).op - for x in external_control_inputs - if x.outputs - ] - self.Exit() + external_control_inputs = [ + array_ops.identity(x.outputs[0]).op + for x in external_control_inputs + if x.outputs + ] # pylint: disable=protected-access op._add_control_inputs(external_control_inputs) # pylint: enable=protected-access -- GitLab From c475ede7a02ff9a3e919ecbb9545be9377013bf1 Mon Sep 17 00:00:00 2001 From: George Sterpu Date: Fri, 2 Nov 2018 17:11:03 +0000 Subject: [PATCH 0146/1078] Update beam_search_decoder.py #22172 probably not the neatest way to update my previous pull request... --- tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index ab36848f13..8f8f057702 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -921,6 +921,7 @@ def _get_scores(log_probs, sequence_lengths, length_penalty_weight, """ length_penalty_ = _length_penalty( sequence_lengths=sequence_lengths, penalty_factor=length_penalty_weight) + length_penalty_ = math_ops.cast(length_penalty_, dtype=log_probs.dtype) scores = log_probs / length_penalty_ coverage_penalty_weight = ops.convert_to_tensor( -- GitLab From b0d15134f110ce380a0e769c4f415d41fbea2677 Mon Sep 17 00:00:00 2001 From: joaak <29533036+joaak@users.noreply.github.com> Date: Mon, 5 Nov 2018 15:51:37 -0500 Subject: [PATCH 0147/1078] update tokenizer code to remove bug --- .../image_captioning_with_attention.ipynb | 2301 +++++++++-------- 1 file changed, 1155 insertions(+), 1146 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb index 3acecd283c..09ea021c44 100644 --- a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb +++ b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb @@ -1,1184 +1,1193 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "K2s1A9eLRPEj" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\").\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Cffg2i257iMS" + }, + "source": [ + "# Image Captioning with Attention\n", + "\n", + "
\n", + "\n", + " Run in Google Colab \n", + "\n", + "View source on GitHub
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "QASbY_HGo4Lq" + }, + "source": [ + "Image captioning is the task of generating a caption for an image. Given an image like this:\n", + "\n", + "![Man Surfing](https://tensorflow.org/images/surf.jpg) \n", + "\n", + "[Image Source](https://commons.wikimedia.org/wiki/Surfing#/media/File:Surfing_in_Hawaii.jpg), License: Public Domain\n", + "\n", + "Our goal is to generate a caption, such as \"a surfer riding on a wave\". Here, we'll use an attention-based model. This enables us to see which parts of the image the model focuses on as it generates a caption.\n", + "\n", + "![Prediction](https://tensorflow.org/images/imcap_prediction.png)\n", + "\n", + "This model architecture below is similar to [Show, Attend and Tell: Neural Image Caption Generation with Visual Attention](https://arxiv.org/abs/1502.03044). \n", + "\n", + "The code uses [tf.keras](https://www.tensorflow.org/programmers_guide/keras) and [eager execution](https://www.tensorflow.org/programmers_guide/eager), which you can learn more about in the linked guides.\n", + "\n", + "This notebook is an end-to-end example. If you run it, it will download the [MS-COCO](http://cocodataset.org/#home) dataset, preprocess and cache a subset of the images using Inception V3, train an encoder-decoder model, and use it to generate captions on new images.\n", + "\n", + "The code requires TensorFlow version >=1.9. If you're running this in [Colab]()\n", + "\n", + "In this example, we're training on a relatively small amount of data as an example. On a single P100 GPU, this example will take about ~2 hours to train. We train on the first 30,000 captions (corresponding to about ~20,000 images depending on shuffling, as there are multiple captions per image in the dataset)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { "colab": { - "name": "image_captioning_with_attention.ipynb", - "version": "0.3.2", - "views": {}, - "default_view": {}, - "provenance": [ - { - "file_id": "1HI8OK2sMjcx9CTWVn0122QAHOuXaOaMg", - "timestamp": 1530222436922 - } - ], - "private_outputs": true, - "collapsed_sections": [], - "toc_visible": true + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "accelerator": "GPU" + "colab_type": "code", + "id": "U8l4RJ0XRPEm" + }, + "outputs": [], + "source": [ + "# Import TensorFlow and enable eager execution\n", + "# This code requires TensorFlow version >=1.9\n", + "import tensorflow as tf\n", + "tf.enable_eager_execution()\n", + "\n", + "# We'll generate plots of attention in order to see which parts of an image\n", + "# our model focuses on during captioning\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Scikit-learn includes many helpful utilities\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.utils import shuffle\n", + "\n", + "import re\n", + "import numpy as np\n", + "import os\n", + "import time\n", + "import json\n", + "from glob import glob\n", + "from PIL import Image\n", + "import pickle" + ] }, - "cells": [ - { - "metadata": { - "id": "K2s1A9eLRPEj", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "##### Copyright 2018 The TensorFlow Authors.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\").\n" - ] - }, - { - "metadata": { - "id": "Cffg2i257iMS", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "# Image Captioning with Attention\n", - "\n", - "
\n", - "\n", - " Run in Google Colab \n", - "\n", - "View source on GitHub
" - ] - }, - { - "metadata": { - "id": "QASbY_HGo4Lq", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "Image captioning is the task of generating a caption for an image. Given an image like this:\n", - "\n", - "![Man Surfing](https://tensorflow.org/images/surf.jpg) \n", - "\n", - "[Image Source](https://commons.wikimedia.org/wiki/Surfing#/media/File:Surfing_in_Hawaii.jpg), License: Public Domain\n", - "\n", - "Our goal is to generate a caption, such as \"a surfer riding on a wave\". Here, we'll use an attention-based model. This enables us to see which parts of the image the model focuses on as it generates a caption.\n", - "\n", - "![Prediction](https://tensorflow.org/images/imcap_prediction.png)\n", - "\n", - "This model architecture below is similar to [Show, Attend and Tell: Neural Image Caption Generation with Visual Attention](https://arxiv.org/abs/1502.03044). \n", - "\n", - "The code uses [tf.keras](https://www.tensorflow.org/programmers_guide/keras) and [eager execution](https://www.tensorflow.org/programmers_guide/eager), which you can learn more about in the linked guides.\n", - "\n", - "This notebook is an end-to-end example. If you run it, it will download the [MS-COCO](http://cocodataset.org/#home) dataset, preprocess and cache a subset of the images using Inception V3, train an encoder-decoder model, and use it to generate captions on new images.\n", - "\n", - "The code requires TensorFlow version >=1.9. If you're running this in [Colab]()\n", - "\n", - "In this example, we're training on a relatively small amount of data as an example. On a single P100 GPU, this example will take about ~2 hours to train. We train on the first 30,000 captions (corresponding to about ~20,000 images depending on shuffling, as there are multiple captions per image in the dataset)\n" - ] - }, - { - "metadata": { - "id": "U8l4RJ0XRPEm", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# Import TensorFlow and enable eager execution\n", - "# This code requires TensorFlow version >=1.9\n", - "import tensorflow as tf\n", - "tf.enable_eager_execution()\n", - "\n", - "# We'll generate plots of attention in order to see which parts of an image\n", - "# our model focuses on during captioning\n", - "import matplotlib.pyplot as plt\n", - "\n", - "# Scikit-learn includes many helpful utilities\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.utils import shuffle\n", - "\n", - "import re\n", - "import numpy as np\n", - "import os\n", - "import time\n", - "import json\n", - "from glob import glob\n", - "from PIL import Image\n", - "import pickle" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "b6qbGw8MRPE5", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Download and prepare the MS-COCO dataset\n", - "\n", - "We will use the [MS-COCO dataset](http://cocodataset.org/#home) to train our model. This dataset contains >82,000 images, each of which has been annotated with at least 5 different captions. The code below will download and extract the dataset automatically. \n", - "\n", - "**Caution: large download ahead**. We'll use the training set, it's a 13GB file." - ] - }, - { - "metadata": { - "id": "krQuPYTtRPE7", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "annotation_zip = tf.keras.utils.get_file('captions.zip', \n", - " cache_subdir=os.path.abspath('.'),\n", - " origin = 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip',\n", - " extract = True)\n", - "annotation_file = os.path.dirname(annotation_zip)+'/annotations/captions_train2014.json'\n", - "\n", - "name_of_zip = 'train2014.zip'\n", - "if not os.path.exists(os.path.abspath('.') + '/' + name_of_zip):\n", - " image_zip = tf.keras.utils.get_file(name_of_zip, \n", - " cache_subdir=os.path.abspath('.'),\n", - " origin = 'http://images.cocodataset.org/zips/train2014.zip',\n", - " extract = True)\n", - " PATH = os.path.dirname(image_zip)+'/train2014/'\n", - "else:\n", - " PATH = os.path.abspath('.')+'/train2014/'" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "aANEzb5WwSzg", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Optionally, limit the size of the training set for faster training\n", - "For this example, we'll select a subset of 30,000 captions and use these and the corresponding images to train our model. As always, captioning quality will improve if you choose to use more data." - ] - }, - { - "metadata": { - "id": "4G3b8x8_RPFD", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# read the json file\n", - "with open(annotation_file, 'r') as f:\n", - " annotations = json.load(f)\n", - "\n", - "# storing the captions and the image name in vectors\n", - "all_captions = []\n", - "all_img_name_vector = []\n", - "\n", - "for annot in annotations['annotations']:\n", - " caption = ' ' + annot['caption'] + ' '\n", - " image_id = annot['image_id']\n", - " full_coco_image_path = PATH + 'COCO_train2014_' + '%012d.jpg' % (image_id)\n", - " \n", - " all_img_name_vector.append(full_coco_image_path)\n", - " all_captions.append(caption)\n", - "\n", - "# shuffling the captions and image_names together\n", - "# setting a random state\n", - "train_captions, img_name_vector = shuffle(all_captions,\n", - " all_img_name_vector,\n", - " random_state=1)\n", - "\n", - "# selecting the first 30000 captions from the shuffled set\n", - "num_examples = 30000\n", - "train_captions = train_captions[:num_examples]\n", - "img_name_vector = img_name_vector[:num_examples]" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "mPBMgK34RPFL", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "len(train_captions), len(all_captions)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "8cSW4u-ORPFQ", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Preprocess the images using InceptionV3\n", - "Next, we will use InceptionV3 (pretrained on Imagenet) to classify each image. We will extract features from the last convolutional layer. \n", - "\n", - "First, we will need to convert the images into the format inceptionV3 expects by:\n", - "* Resizing the image to (299, 299)\n", - "* Using the [preprocess_input](https://www.tensorflow.org/api_docs/python/tf/keras/applications/inception_v3/preprocess_input) method to place the pixels in the range of -1 to 1 (to match the format of the images used to train InceptionV3)." - ] - }, - { - "metadata": { - "id": "zXR0217aRPFR", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "def load_image(image_path):\n", - " img = tf.read_file(image_path)\n", - " img = tf.image.decode_jpeg(img, channels=3)\n", - " img = tf.image.resize_images(img, (299, 299))\n", - " img = tf.keras.applications.inception_v3.preprocess_input(img)\n", - " return img, image_path" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "MDvIu4sXRPFV", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Initialize InceptionV3 and load the pretrained Imagenet weights\n", - "\n", - "To do so, we'll create a tf.keras model where the output layer is the last convolutional layer in the InceptionV3 architecture. \n", - "* Each image is forwarded through the network and the vector that we get at the end is stored in a dictionary (image_name --> feature_vector). \n", - "* We use the last convolutional layer because we are using attention in this example. The shape of the output of this layer is ```8x8x2048```. \n", - "* We avoid doing this during training so it does not become a bottleneck. \n", - "* After all the images are passed through the network, we pickle the dictionary and save it to disk." - ] - }, - { - "metadata": { - "id": "RD3vW4SsRPFW", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "image_model = tf.keras.applications.InceptionV3(include_top=False, \n", - " weights='imagenet')\n", - "new_input = image_model.input\n", - "hidden_layer = image_model.layers[-1].output\n", - "\n", - "image_features_extract_model = tf.keras.Model(new_input, hidden_layer)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "rERqlR3WRPGO", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Caching the features extracted from InceptionV3\n", - "\n", - "We will pre-process each image with InceptionV3 and cache the output to disk. Caching the output in RAM would be faster but memory intensive, requiring 8 \\* 8 \\* 2048 floats per image. At the time of writing, this would exceed the memory limitations of Colab (although these may change, an instance appears to have about 12GB of memory currently). \n", - "\n", - "Performance could be improved with a more sophisticated caching strategy (e.g., by sharding the images to reduce random access disk I/O) at the cost of more code.\n", - "\n", - "This will take about 10 minutes to run in Colab with a GPU. If you'd like to see a progress bar, you could: install [tqdm](https://github.com/tqdm/tqdm) (```!pip install tqdm```), then change this line: \n", - "\n", - "```for img, path in image_dataset:``` \n", - "\n", - "to:\n", - "\n", - "```for img, path in tqdm(image_dataset):```." - ] - }, - { - "metadata": { - "id": "Dx_fvbVgRPGQ", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# getting the unique images\n", - "encode_train = sorted(set(img_name_vector))\n", - "\n", - "# feel free to change the batch_size according to your system configuration\n", - "image_dataset = tf.data.Dataset.from_tensor_slices(\n", - " encode_train).map(load_image).batch(16)\n", - "\n", - "for img, path in image_dataset:\n", - " batch_features = image_features_extract_model(img)\n", - " batch_features = tf.reshape(batch_features, \n", - " (batch_features.shape[0], -1, batch_features.shape[3]))\n", - "\n", - " for bf, p in zip(batch_features, path):\n", - " path_of_feature = p.numpy().decode(\"utf-8\")\n", - " np.save(path_of_feature, bf.numpy())" - ], - "execution_count": 0, - "outputs": [] - }, - { - "metadata": { - "id": "nyqH3zFwRPFi", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Preprocess and tokenize the captions\n", - "\n", - "* First, we'll tokenize the captions (e.g., by splitting on spaces). This will give us a vocabulary of all the unique words in the data (e.g., \"surfing\", \"football\", etc).\n", - "* Next, we'll limit the vocabulary size to the top 5,000 words to save memory. We'll replace all other words with the token \"UNK\" (for unknown).\n", - "* Finally, we create a word --> index mapping and vice-versa.\n", - "* We will then pad all sequences to the be same length as the longest one. " - ] - }, - { - "metadata": { - "id": "HZfK8RhQRPFj", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# This will find the maximum length of any caption in our dataset\n", - "def calc_max_length(tensor):\n", - " return max(len(t) for t in tensor)" - ], - "execution_count": 0, - "outputs": [] + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "b6qbGw8MRPE5" + }, + "source": [ + "## Download and prepare the MS-COCO dataset\n", + "\n", + "We will use the [MS-COCO dataset](http://cocodataset.org/#home) to train our model. This dataset contains >82,000 images, each of which has been annotated with at least 5 different captions. The code below will download and extract the dataset automatically. \n", + "\n", + "**Caution: large download ahead**. We'll use the training set, it's a 13GB file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "oJGE34aiRPFo", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# The steps above is a general process of dealing with text processing\n", - "\n", - "# choosing the top 5000 words from the vocabulary\n", - "top_k = 5000\n", - "tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=top_k, \n", - " oov_token=\"\", \n", - " filters='!\"#$%&()*+.,-/:;=?@[\\]^_`{|}~ ')\n", - "tokenizer.fit_on_texts(train_captions)\n", - "train_seqs = tokenizer.texts_to_sequences(train_captions)" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "krQuPYTtRPE7" + }, + "outputs": [], + "source": [ + "annotation_zip = tf.keras.utils.get_file('captions.zip', \n", + " cache_subdir=os.path.abspath('.'),\n", + " origin = 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip',\n", + " extract = True)\n", + "annotation_file = os.path.dirname(annotation_zip)+'/annotations/captions_train2014.json'\n", + "\n", + "name_of_zip = 'train2014.zip'\n", + "if not os.path.exists(os.path.abspath('.') + '/' + name_of_zip):\n", + " image_zip = tf.keras.utils.get_file(name_of_zip, \n", + " cache_subdir=os.path.abspath('.'),\n", + " origin = 'http://images.cocodataset.org/zips/train2014.zip',\n", + " extract = True)\n", + " PATH = os.path.dirname(image_zip)+'/train2014/'\n", + "else:\n", + " PATH = os.path.abspath('.')+'/train2014/'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "aANEzb5WwSzg" + }, + "source": [ + "## Optionally, limit the size of the training set for faster training\n", + "For this example, we'll select a subset of 30,000 captions and use these and the corresponding images to train our model. As always, captioning quality will improve if you choose to use more data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "8Q44tNQVRPFt", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "tokenizer.word_index = {key:value for key, value in tokenizer.word_index.items() if value <= top_k}\n", - "# putting token in the word2idx dictionary\n", - "tokenizer.word_index[tokenizer.oov_token] = top_k + 1\n", - "tokenizer.word_index[''] = 0" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "4G3b8x8_RPFD" + }, + "outputs": [], + "source": [ + "# read the json file\n", + "with open(annotation_file, 'r') as f:\n", + " annotations = json.load(f)\n", + "\n", + "# storing the captions and the image name in vectors\n", + "all_captions = []\n", + "all_img_name_vector = []\n", + "\n", + "for annot in annotations['annotations']:\n", + " caption = ' ' + annot['caption'] + ' '\n", + " image_id = annot['image_id']\n", + " full_coco_image_path = PATH + 'COCO_train2014_' + '%012d.jpg' % (image_id)\n", + " \n", + " all_img_name_vector.append(full_coco_image_path)\n", + " all_captions.append(caption)\n", + "\n", + "# shuffling the captions and image_names together\n", + "# setting a random state\n", + "train_captions, img_name_vector = shuffle(all_captions,\n", + " all_img_name_vector,\n", + " random_state=1)\n", + "\n", + "# selecting the first 30000 captions from the shuffled set\n", + "num_examples = 30000\n", + "train_captions = train_captions[:num_examples]\n", + "img_name_vector = img_name_vector[:num_examples]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "0fpJb5ojRPFv", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# creating the tokenized vectors\n", - "train_seqs = tokenizer.texts_to_sequences(train_captions)" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "mPBMgK34RPFL" + }, + "outputs": [], + "source": [ + "len(train_captions), len(all_captions)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "8cSW4u-ORPFQ" + }, + "source": [ + "## Preprocess the images using InceptionV3\n", + "Next, we will use InceptionV3 (pretrained on Imagenet) to classify each image. We will extract features from the last convolutional layer. \n", + "\n", + "First, we will need to convert the images into the format inceptionV3 expects by:\n", + "* Resizing the image to (299, 299)\n", + "* Using the [preprocess_input](https://www.tensorflow.org/api_docs/python/tf/keras/applications/inception_v3/preprocess_input) method to place the pixels in the range of -1 to 1 (to match the format of the images used to train InceptionV3)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "olQArbgbRPF1", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# creating a reverse mapping (index -> word)\n", - "index_word = {value:key for key, value in tokenizer.word_index.items()}" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "zXR0217aRPFR" + }, + "outputs": [], + "source": [ + "def load_image(image_path):\n", + " img = tf.read_file(image_path)\n", + " img = tf.image.decode_jpeg(img, channels=3)\n", + " img = tf.image.resize_images(img, (299, 299))\n", + " img = tf.keras.applications.inception_v3.preprocess_input(img)\n", + " return img, image_path" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MDvIu4sXRPFV" + }, + "source": [ + "## Initialize InceptionV3 and load the pretrained Imagenet weights\n", + "\n", + "To do so, we'll create a tf.keras model where the output layer is the last convolutional layer in the InceptionV3 architecture. \n", + "* Each image is forwarded through the network and the vector that we get at the end is stored in a dictionary (image_name --> feature_vector). \n", + "* We use the last convolutional layer because we are using attention in this example. The shape of the output of this layer is ```8x8x2048```. \n", + "* We avoid doing this during training so it does not become a bottleneck. \n", + "* After all the images are passed through the network, we pickle the dictionary and save it to disk." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "AidglIZVRPF4", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# padding each vector to the max_length of the captions\n", - "# if the max_length parameter is not provided, pad_sequences calculates that automatically\n", - "cap_vector = tf.keras.preprocessing.sequence.pad_sequences(train_seqs, padding='post')" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "RD3vW4SsRPFW" + }, + "outputs": [], + "source": [ + "image_model = tf.keras.applications.InceptionV3(include_top=False, \n", + " weights='imagenet')\n", + "new_input = image_model.input\n", + "hidden_layer = image_model.layers[-1].output\n", + "\n", + "image_features_extract_model = tf.keras.Model(new_input, hidden_layer)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "rERqlR3WRPGO" + }, + "source": [ + "## Caching the features extracted from InceptionV3\n", + "\n", + "We will pre-process each image with InceptionV3 and cache the output to disk. Caching the output in RAM would be faster but memory intensive, requiring 8 \\* 8 \\* 2048 floats per image. At the time of writing, this would exceed the memory limitations of Colab (although these may change, an instance appears to have about 12GB of memory currently). \n", + "\n", + "Performance could be improved with a more sophisticated caching strategy (e.g., by sharding the images to reduce random access disk I/O) at the cost of more code.\n", + "\n", + "This will take about 10 minutes to run in Colab with a GPU. If you'd like to see a progress bar, you could: install [tqdm](https://github.com/tqdm/tqdm) (```!pip install tqdm```), then change this line: \n", + "\n", + "```for img, path in image_dataset:``` \n", + "\n", + "to:\n", + "\n", + "```for img, path in tqdm(image_dataset):```." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "gL0wkttkRPGA", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# calculating the max_length \n", - "# used to store the attention weights\n", - "max_length = calc_max_length(train_seqs)" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "Dx_fvbVgRPGQ" + }, + "outputs": [], + "source": [ + "# getting the unique images\n", + "encode_train = sorted(set(img_name_vector))\n", + "\n", + "# feel free to change the batch_size according to your system configuration\n", + "image_dataset = tf.data.Dataset.from_tensor_slices(\n", + " encode_train).map(load_image).batch(16)\n", + "\n", + "for img, path in image_dataset:\n", + " batch_features = image_features_extract_model(img)\n", + " batch_features = tf.reshape(batch_features, \n", + " (batch_features.shape[0], -1, batch_features.shape[3]))\n", + "\n", + " for bf, p in zip(batch_features, path):\n", + " path_of_feature = p.numpy().decode(\"utf-8\")\n", + " np.save(path_of_feature, bf.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nyqH3zFwRPFi" + }, + "source": [ + "## Preprocess and tokenize the captions\n", + "\n", + "* First, we'll tokenize the captions (e.g., by splitting on spaces). This will give us a vocabulary of all the unique words in the data (e.g., \"surfing\", \"football\", etc).\n", + "* Next, we'll limit the vocabulary size to the top 5,000 words to save memory. We'll replace all other words with the token \"UNK\" (for unknown).\n", + "* Finally, we create a word --> index mapping and vice-versa.\n", + "* We will then pad all sequences to the be same length as the longest one. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "M3CD75nDpvTI", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Split the data into training and testing" - ] + "colab_type": "code", + "id": "HZfK8RhQRPFj" + }, + "outputs": [], + "source": [ + "# This will find the maximum length of any caption in our dataset\n", + "def calc_max_length(tensor):\n", + " return max(len(t) for t in tensor)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "iS7DDMszRPGF", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# Create training and validation sets using 80-20 split\n", - "img_name_train, img_name_val, cap_train, cap_val = train_test_split(img_name_vector, \n", - " cap_vector, \n", - " test_size=0.2, \n", - " random_state=0)" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "oJGE34aiRPFo" + }, + "outputs": [], + "source": [ + "# The steps above is a general process of dealing with text processing\n", + "\n", + "# choosing the top 5000 words from the vocabulary\n", + "top_k = 5000\n", + "tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=top_k, \n", + " oov_token=\"\", \n", + " filters='!\"#$%&()*+.,-/:;=?@[\\]^_`{|}~ ')\n", + "tokenizer.fit_on_texts(train_captions)\n", + "train_seqs = tokenizer.texts_to_sequences(train_captions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "XmViPkRFRPGH", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "len(img_name_train), len(cap_train), len(img_name_val), len(cap_val)" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "8Q44tNQVRPFt" + }, + "outputs": [], + "source": [ + "tokenizer.word_index[''] = 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "uEWM9xrYcg45", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Our images and captions are ready! Next, let's create a tf.data dataset to use for training our model.\n", - "\n" - ] + "colab_type": "code", + "id": "0fpJb5ojRPFv" + }, + "outputs": [], + "source": [ + "# creating the tokenized vectors\n", + "train_seqs = tokenizer.texts_to_sequences(train_captions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "Q3TnZ1ToRPGV", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# feel free to change these parameters according to your system's configuration\n", - "\n", - "BATCH_SIZE = 64\n", - "BUFFER_SIZE = 1000\n", - "embedding_dim = 256\n", - "units = 512\n", - "vocab_size = len(tokenizer.word_index)\n", - "# shape of the vector extracted from InceptionV3 is (64, 2048)\n", - "# these two variables represent that\n", - "features_shape = 2048\n", - "attention_features_shape = 64" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "olQArbgbRPF1" + }, + "outputs": [], + "source": [ + "# creating a reverse mapping (index -> word)\n", + "index_word = {value:key for key, value in tokenizer.word_index.items()}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "SmZS2N0bXG3T", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# loading the numpy files \n", - "def map_func(img_name, cap):\n", - " img_tensor = np.load(img_name.decode('utf-8')+'.npy')\n", - " return img_tensor, cap" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "AidglIZVRPF4" + }, + "outputs": [], + "source": [ + "# padding each vector to the max_length of the captions\n", + "# if the max_length parameter is not provided, pad_sequences calculates that automatically\n", + "cap_vector = tf.keras.preprocessing.sequence.pad_sequences(train_seqs, padding='post')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "FDF_Nm3tRPGZ", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "dataset = tf.data.Dataset.from_tensor_slices((img_name_train, cap_train))\n", - "\n", - "# using map to load the numpy files in parallel\n", - "# NOTE: Be sure to set num_parallel_calls to the number of CPU cores you have\n", - "# https://www.tensorflow.org/api_docs/python/tf/py_func\n", - "dataset = dataset.map(lambda item1, item2: tf.py_func(\n", - " map_func, [item1, item2], [tf.float32, tf.int32]), num_parallel_calls=8)\n", - "\n", - "# shuffling and batching\n", - "dataset = dataset.shuffle(BUFFER_SIZE)\n", - "# https://www.tensorflow.org/api_docs/python/tf/contrib/data/batch_and_drop_remainder\n", - "dataset = dataset.batch(BATCH_SIZE)\n", - "dataset = dataset.prefetch(1)" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "gL0wkttkRPGA" + }, + "outputs": [], + "source": [ + "# calculating the max_length \n", + "# used to store the attention weights\n", + "max_length = calc_max_length(train_seqs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "M3CD75nDpvTI" + }, + "source": [ + "## Split the data into training and testing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "nrvoDphgRPGd", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Model\n", - "\n", - "Fun fact, the decoder below is identical to the one in the example for [Neural Machine Translation with Attention]( https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb).\n", - "\n", - "The model architecture is inspired by the [Show, Attend and Tell](https://arxiv.org/pdf/1502.03044.pdf) paper.\n", - "\n", - "* In this example, we extract the features from the lower convolutional layer of InceptionV3 giving us a vector of shape (8, 8, 2048). \n", - "* We squash that to a shape of (64, 2048).\n", - "* This vector is then passed through the CNN Encoder(which consists of a single Fully connected layer).\n", - "* The RNN(here GRU) attends over the image to predict the next word." - ] + "colab_type": "code", + "id": "iS7DDMszRPGF" + }, + "outputs": [], + "source": [ + "# Create training and validation sets using 80-20 split\n", + "img_name_train, img_name_val, cap_train, cap_val = train_test_split(img_name_vector, \n", + " cap_vector, \n", + " test_size=0.2, \n", + " random_state=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "AAppCGLKRPGd", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "def gru(units):\n", - " # If you have a GPU, we recommend using the CuDNNGRU layer (it provides a \n", - " # significant speedup).\n", - " if tf.test.is_gpu_available():\n", - " return tf.keras.layers.CuDNNGRU(units, \n", - " return_sequences=True, \n", - " return_state=True, \n", - " recurrent_initializer='glorot_uniform')\n", - " else:\n", - " return tf.keras.layers.GRU(units, \n", - " return_sequences=True, \n", - " return_state=True, \n", - " recurrent_activation='sigmoid', \n", - " recurrent_initializer='glorot_uniform')" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "XmViPkRFRPGH" + }, + "outputs": [], + "source": [ + "len(img_name_train), len(cap_train), len(img_name_val), len(cap_val)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "uEWM9xrYcg45" + }, + "source": [ + "## Our images and captions are ready! Next, let's create a tf.data dataset to use for training our model.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "ja2LFTMSdeV3", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "class BahdanauAttention(tf.keras.Model):\n", - " def __init__(self, units):\n", - " super(BahdanauAttention, self).__init__()\n", - " self.W1 = tf.keras.layers.Dense(units)\n", - " self.W2 = tf.keras.layers.Dense(units)\n", - " self.V = tf.keras.layers.Dense(1)\n", - " \n", - " def call(self, features, hidden):\n", - " # features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)\n", - " \n", - " # hidden shape == (batch_size, hidden_size)\n", - " # hidden_with_time_axis shape == (batch_size, 1, hidden_size)\n", - " hidden_with_time_axis = tf.expand_dims(hidden, 1)\n", - " \n", - " # score shape == (batch_size, 64, hidden_size)\n", - " score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))\n", - " \n", - " # attention_weights shape == (batch_size, 64, 1)\n", - " # we get 1 at the last axis because we are applying score to self.V\n", - " attention_weights = tf.nn.softmax(self.V(score), axis=1)\n", - " \n", - " # context_vector shape after sum == (batch_size, hidden_size)\n", - " context_vector = attention_weights * features\n", - " context_vector = tf.reduce_sum(context_vector, axis=1)\n", - " \n", - " return context_vector, attention_weights" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "Q3TnZ1ToRPGV" + }, + "outputs": [], + "source": [ + "# feel free to change these parameters according to your system's configuration\n", + "\n", + "BATCH_SIZE = 64\n", + "BUFFER_SIZE = 1000\n", + "embedding_dim = 256\n", + "units = 512\n", + "vocab_size = len(tokenizer.word_index)\n", + "# shape of the vector extracted from InceptionV3 is (64, 2048)\n", + "# these two variables represent that\n", + "features_shape = 2048\n", + "attention_features_shape = 64" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "AZ7R1RxHRPGf", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "class CNN_Encoder(tf.keras.Model):\n", - " # Since we have already extracted the features and dumped it using pickle\n", - " # This encoder passes those features through a Fully connected layer\n", - " def __init__(self, embedding_dim):\n", - " super(CNN_Encoder, self).__init__()\n", - " # shape after fc == (batch_size, 64, embedding_dim)\n", - " self.fc = tf.keras.layers.Dense(embedding_dim)\n", - " \n", - " def call(self, x):\n", - " x = self.fc(x)\n", - " x = tf.nn.relu(x)\n", - " return x" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "SmZS2N0bXG3T" + }, + "outputs": [], + "source": [ + "# loading the numpy files \n", + "def map_func(img_name, cap):\n", + " img_tensor = np.load(img_name.decode('utf-8')+'.npy')\n", + " return img_tensor, cap" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "V9UbGQmERPGi", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "class RNN_Decoder(tf.keras.Model):\n", - " def __init__(self, embedding_dim, units, vocab_size):\n", - " super(RNN_Decoder, self).__init__()\n", - " self.units = units\n", - "\n", - " self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n", - " self.gru = gru(self.units)\n", - " self.fc1 = tf.keras.layers.Dense(self.units)\n", - " self.fc2 = tf.keras.layers.Dense(vocab_size)\n", - " \n", - " self.attention = BahdanauAttention(self.units)\n", - " \n", - " def call(self, x, features, hidden):\n", - " # defining attention as a separate model\n", - " context_vector, attention_weights = self.attention(features, hidden)\n", - " \n", - " # x shape after passing through embedding == (batch_size, 1, embedding_dim)\n", - " x = self.embedding(x)\n", - " \n", - " # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)\n", - " x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)\n", - " \n", - " # passing the concatenated vector to the GRU\n", - " output, state = self.gru(x)\n", - " \n", - " # shape == (batch_size, max_length, hidden_size)\n", - " x = self.fc1(output)\n", - " \n", - " # x shape == (batch_size * max_length, hidden_size)\n", - " x = tf.reshape(x, (-1, x.shape[2]))\n", - " \n", - " # output shape == (batch_size * max_length, vocab)\n", - " x = self.fc2(x)\n", - "\n", - " return x, state, attention_weights\n", - "\n", - " def reset_state(self, batch_size):\n", - " return tf.zeros((batch_size, self.units))" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "FDF_Nm3tRPGZ" + }, + "outputs": [], + "source": [ + "dataset = tf.data.Dataset.from_tensor_slices((img_name_train, cap_train))\n", + "\n", + "# using map to load the numpy files in parallel\n", + "# NOTE: Be sure to set num_parallel_calls to the number of CPU cores you have\n", + "# https://www.tensorflow.org/api_docs/python/tf/py_func\n", + "dataset = dataset.map(lambda item1, item2: tf.py_func(\n", + " map_func, [item1, item2], [tf.float32, tf.int32]), num_parallel_calls=8)\n", + "\n", + "# shuffling and batching\n", + "dataset = dataset.shuffle(BUFFER_SIZE)\n", + "# https://www.tensorflow.org/api_docs/python/tf/contrib/data/batch_and_drop_remainder\n", + "dataset = dataset.batch(BATCH_SIZE)\n", + "dataset = dataset.prefetch(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nrvoDphgRPGd" + }, + "source": [ + "## Model\n", + "\n", + "Fun fact, the decoder below is identical to the one in the example for [Neural Machine Translation with Attention]( https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb).\n", + "\n", + "The model architecture is inspired by the [Show, Attend and Tell](https://arxiv.org/pdf/1502.03044.pdf) paper.\n", + "\n", + "* In this example, we extract the features from the lower convolutional layer of InceptionV3 giving us a vector of shape (8, 8, 2048). \n", + "* We squash that to a shape of (64, 2048).\n", + "* This vector is then passed through the CNN Encoder(which consists of a single Fully connected layer).\n", + "* The RNN(here GRU) attends over the image to predict the next word." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "Qs_Sr03wRPGk", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "encoder = CNN_Encoder(embedding_dim)\n", - "decoder = RNN_Decoder(embedding_dim, units, vocab_size)" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "AAppCGLKRPGd" + }, + "outputs": [], + "source": [ + "def gru(units):\n", + " # If you have a GPU, we recommend using the CuDNNGRU layer (it provides a \n", + " # significant speedup).\n", + " if tf.test.is_gpu_available():\n", + " return tf.keras.layers.CuDNNGRU(units, \n", + " return_sequences=True, \n", + " return_state=True, \n", + " recurrent_initializer='glorot_uniform')\n", + " else:\n", + " return tf.keras.layers.GRU(units, \n", + " return_sequences=True, \n", + " return_state=True, \n", + " recurrent_activation='sigmoid', \n", + " recurrent_initializer='glorot_uniform')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "-bYN7xA0RPGl", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "optimizer = tf.train.AdamOptimizer()\n", - "\n", - "# We are masking the loss calculated for padding\n", - "def loss_function(real, pred):\n", - " mask = 1 - np.equal(real, 0)\n", - " loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask\n", - " return tf.reduce_mean(loss_)" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "ja2LFTMSdeV3" + }, + "outputs": [], + "source": [ + "class BahdanauAttention(tf.keras.Model):\n", + " def __init__(self, units):\n", + " super(BahdanauAttention, self).__init__()\n", + " self.W1 = tf.keras.layers.Dense(units)\n", + " self.W2 = tf.keras.layers.Dense(units)\n", + " self.V = tf.keras.layers.Dense(1)\n", + " \n", + " def call(self, features, hidden):\n", + " # features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)\n", + " \n", + " # hidden shape == (batch_size, hidden_size)\n", + " # hidden_with_time_axis shape == (batch_size, 1, hidden_size)\n", + " hidden_with_time_axis = tf.expand_dims(hidden, 1)\n", + " \n", + " # score shape == (batch_size, 64, hidden_size)\n", + " score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))\n", + " \n", + " # attention_weights shape == (batch_size, 64, 1)\n", + " # we get 1 at the last axis because we are applying score to self.V\n", + " attention_weights = tf.nn.softmax(self.V(score), axis=1)\n", + " \n", + " # context_vector shape after sum == (batch_size, hidden_size)\n", + " context_vector = attention_weights * features\n", + " context_vector = tf.reduce_sum(context_vector, axis=1)\n", + " \n", + " return context_vector, attention_weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "PHod7t72RPGn", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Training\n", - "\n", - "* We extract the features stored in the respective `.npy` files and then pass those features through the encoder.\n", - "* The encoder output, hidden state(initialized to 0) and the decoder input (which is the start token) is passed to the decoder.\n", - "* The decoder returns the predictions and the decoder hidden state.\n", - "* The decoder hidden state is then passed back into the model and the predictions are used to calculate the loss.\n", - "* Use teacher forcing to decide the next input to the decoder.\n", - "* Teacher forcing is the technique where the target word is passed as the next input to the decoder.\n", - "* The final step is to calculate the gradients and apply it to the optimizer and backpropagate.\n" - ] + "colab_type": "code", + "id": "AZ7R1RxHRPGf" + }, + "outputs": [], + "source": [ + "class CNN_Encoder(tf.keras.Model):\n", + " # Since we have already extracted the features and dumped it using pickle\n", + " # This encoder passes those features through a Fully connected layer\n", + " def __init__(self, embedding_dim):\n", + " super(CNN_Encoder, self).__init__()\n", + " # shape after fc == (batch_size, 64, embedding_dim)\n", + " self.fc = tf.keras.layers.Dense(embedding_dim)\n", + " \n", + " def call(self, x):\n", + " x = self.fc(x)\n", + " x = tf.nn.relu(x)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "Vt4WZ5mhJE-E", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# adding this in a separate cell because if you run the training cell \n", - "# many times, the loss_plot array will be reset\n", - "loss_plot = []" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "V9UbGQmERPGi" + }, + "outputs": [], + "source": [ + "class RNN_Decoder(tf.keras.Model):\n", + " def __init__(self, embedding_dim, units, vocab_size):\n", + " super(RNN_Decoder, self).__init__()\n", + " self.units = units\n", + "\n", + " self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n", + " self.gru = gru(self.units)\n", + " self.fc1 = tf.keras.layers.Dense(self.units)\n", + " self.fc2 = tf.keras.layers.Dense(vocab_size)\n", + " \n", + " self.attention = BahdanauAttention(self.units)\n", + " \n", + " def call(self, x, features, hidden):\n", + " # defining attention as a separate model\n", + " context_vector, attention_weights = self.attention(features, hidden)\n", + " \n", + " # x shape after passing through embedding == (batch_size, 1, embedding_dim)\n", + " x = self.embedding(x)\n", + " \n", + " # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)\n", + " x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)\n", + " \n", + " # passing the concatenated vector to the GRU\n", + " output, state = self.gru(x)\n", + " \n", + " # shape == (batch_size, max_length, hidden_size)\n", + " x = self.fc1(output)\n", + " \n", + " # x shape == (batch_size * max_length, hidden_size)\n", + " x = tf.reshape(x, (-1, x.shape[2]))\n", + " \n", + " # output shape == (batch_size * max_length, vocab)\n", + " x = self.fc2(x)\n", + "\n", + " return x, state, attention_weights\n", + "\n", + " def reset_state(self, batch_size):\n", + " return tf.zeros((batch_size, self.units))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "UlA4VIQpRPGo", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "EPOCHS = 20\n", - "\n", - "for epoch in range(EPOCHS):\n", - " start = time.time()\n", - " total_loss = 0\n", - " \n", - " for (batch, (img_tensor, target)) in enumerate(dataset):\n", - " loss = 0\n", - " \n", - " # initializing the hidden state for each batch\n", - " # because the captions are not related from image to image\n", - " hidden = decoder.reset_state(batch_size=target.shape[0])\n", - "\n", - " dec_input = tf.expand_dims([tokenizer.word_index['']] * BATCH_SIZE, 1)\n", - " \n", - " with tf.GradientTape() as tape:\n", - " features = encoder(img_tensor)\n", - " \n", - " for i in range(1, target.shape[1]):\n", - " # passing the features through the decoder\n", - " predictions, hidden, _ = decoder(dec_input, features, hidden)\n", - "\n", - " loss += loss_function(target[:, i], predictions)\n", - " \n", - " # using teacher forcing\n", - " dec_input = tf.expand_dims(target[:, i], 1)\n", - " \n", - " total_loss += (loss / int(target.shape[1]))\n", - " \n", - " variables = encoder.variables + decoder.variables\n", - " \n", - " gradients = tape.gradient(loss, variables) \n", - " \n", - " optimizer.apply_gradients(zip(gradients, variables), tf.train.get_or_create_global_step())\n", - " \n", - " if batch % 100 == 0:\n", - " print ('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, \n", - " batch, \n", - " loss.numpy() / int(target.shape[1])))\n", - " # storing the epoch end loss value to plot later\n", - " loss_plot.append(total_loss / len(cap_vector))\n", - " \n", - " print ('Epoch {} Loss {:.6f}'.format(epoch + 1, \n", - " total_loss/len(cap_vector)))\n", - " print ('Time taken for 1 epoch {} sec\\n'.format(time.time() - start))" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "Qs_Sr03wRPGk" + }, + "outputs": [], + "source": [ + "encoder = CNN_Encoder(embedding_dim)\n", + "decoder = RNN_Decoder(embedding_dim, units, vocab_size)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "1Wm83G-ZBPcC", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "plt.plot(loss_plot)\n", - "plt.xlabel('Epochs')\n", - "plt.ylabel('Loss')\n", - "plt.title('Loss Plot')\n", - "plt.show()" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "-bYN7xA0RPGl" + }, + "outputs": [], + "source": [ + "optimizer = tf.train.AdamOptimizer()\n", + "\n", + "# We are masking the loss calculated for padding\n", + "def loss_function(real, pred):\n", + " mask = 1 - np.equal(real, 0)\n", + " loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask\n", + " return tf.reduce_mean(loss_)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "PHod7t72RPGn" + }, + "source": [ + "## Training\n", + "\n", + "* We extract the features stored in the respective `.npy` files and then pass those features through the encoder.\n", + "* The encoder output, hidden state(initialized to 0) and the decoder input (which is the start token) is passed to the decoder.\n", + "* The decoder returns the predictions and the decoder hidden state.\n", + "* The decoder hidden state is then passed back into the model and the predictions are used to calculate the loss.\n", + "* Use teacher forcing to decide the next input to the decoder.\n", + "* Teacher forcing is the technique where the target word is passed as the next input to the decoder.\n", + "* The final step is to calculate the gradients and apply it to the optimizer and backpropagate.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "xGvOcLQKghXN", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Caption!\n", - "\n", - "* The evaluate function is similar to the training loop, except we don't use teacher forcing here. The input to the decoder at each time step is its previous predictions along with the hidden state and the encoder output.\n", - "* Stop predicting when the model predicts the end token.\n", - "* And store the attention weights for every time step." - ] + "colab_type": "code", + "id": "Vt4WZ5mhJE-E" + }, + "outputs": [], + "source": [ + "# adding this in a separate cell because if you run the training cell \n", + "# many times, the loss_plot array will be reset\n", + "loss_plot = []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "RCWpDtyNRPGs", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "def evaluate(image):\n", - " attention_plot = np.zeros((max_length, attention_features_shape))\n", - "\n", - " hidden = decoder.reset_state(batch_size=1)\n", - "\n", - " temp_input = tf.expand_dims(load_image(image)[0], 0)\n", - " img_tensor_val = image_features_extract_model(temp_input)\n", - " img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))\n", - "\n", - " features = encoder(img_tensor_val)\n", - "\n", - " dec_input = tf.expand_dims([tokenizer.word_index['']], 0)\n", - " result = []\n", - "\n", - " for i in range(max_length):\n", - " predictions, hidden, attention_weights = decoder(dec_input, features, hidden)\n", - "\n", - " attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()\n", - "\n", - " predicted_id = tf.argmax(predictions[0]).numpy()\n", - " result.append(index_word[predicted_id])\n", - "\n", - " if index_word[predicted_id] == '':\n", - " return result, attention_plot\n", - "\n", - " dec_input = tf.expand_dims([predicted_id], 0)\n", - "\n", - " attention_plot = attention_plot[:len(result), :]\n", - " return result, attention_plot" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "UlA4VIQpRPGo" + }, + "outputs": [], + "source": [ + "EPOCHS = 20\n", + "\n", + "for epoch in range(EPOCHS):\n", + " start = time.time()\n", + " total_loss = 0\n", + " \n", + " for (batch, (img_tensor, target)) in enumerate(dataset):\n", + " loss = 0\n", + " \n", + " # initializing the hidden state for each batch\n", + " # because the captions are not related from image to image\n", + " hidden = decoder.reset_state(batch_size=target.shape[0])\n", + "\n", + " dec_input = tf.expand_dims([tokenizer.word_index['']] * BATCH_SIZE, 1)\n", + " \n", + " with tf.GradientTape() as tape:\n", + " features = encoder(img_tensor)\n", + " \n", + " for i in range(1, target.shape[1]):\n", + " # passing the features through the decoder\n", + " predictions, hidden, _ = decoder(dec_input, features, hidden)\n", + "\n", + " loss += loss_function(target[:, i], predictions)\n", + " \n", + " # using teacher forcing\n", + " dec_input = tf.expand_dims(target[:, i], 1)\n", + " \n", + " total_loss += (loss / int(target.shape[1]))\n", + " \n", + " variables = encoder.variables + decoder.variables\n", + " \n", + " gradients = tape.gradient(loss, variables) \n", + " \n", + " optimizer.apply_gradients(zip(gradients, variables), tf.train.get_or_create_global_step())\n", + " \n", + " if batch % 100 == 0:\n", + " print ('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, \n", + " batch, \n", + " loss.numpy() / int(target.shape[1])))\n", + " # storing the epoch end loss value to plot later\n", + " loss_plot.append(total_loss / len(cap_vector))\n", + " \n", + " print ('Epoch {} Loss {:.6f}'.format(epoch + 1, \n", + " total_loss/len(cap_vector)))\n", + " print ('Time taken for 1 epoch {} sec\\n'.format(time.time() - start))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "fD_y7PD6RPGt", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "def plot_attention(image, result, attention_plot):\n", - " temp_image = np.array(Image.open(image))\n", - "\n", - " fig = plt.figure(figsize=(10, 10))\n", - " \n", - " len_result = len(result)\n", - " for l in range(len_result):\n", - " temp_att = np.resize(attention_plot[l], (8, 8))\n", - " ax = fig.add_subplot(len_result//2, len_result//2, l+1)\n", - " ax.set_title(result[l])\n", - " img = ax.imshow(temp_image)\n", - " ax.imshow(temp_att, cmap='gray', alpha=0.6, extent=img.get_extent())\n", - "\n", - " plt.tight_layout()\n", - " plt.show()" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "1Wm83G-ZBPcC" + }, + "outputs": [], + "source": [ + "plt.plot(loss_plot)\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Loss')\n", + "plt.title('Loss Plot')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xGvOcLQKghXN" + }, + "source": [ + "## Caption!\n", + "\n", + "* The evaluate function is similar to the training loop, except we don't use teacher forcing here. The input to the decoder at each time step is its previous predictions along with the hidden state and the encoder output.\n", + "* Stop predicting when the model predicts the end token.\n", + "* And store the attention weights for every time step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "io7ws3ReRPGv", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "# captions on the validation set\n", - "rid = np.random.randint(0, len(img_name_val))\n", - "image = img_name_val[rid]\n", - "real_caption = ' '.join([index_word[i] for i in cap_val[rid] if i not in [0]])\n", - "result, attention_plot = evaluate(image)\n", - "\n", - "print ('Real Caption:', real_caption)\n", - "print ('Prediction Caption:', ' '.join(result))\n", - "plot_attention(image, result, attention_plot)\n", - "# opening the image\n", - "Image.open(img_name_val[rid])" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "RCWpDtyNRPGs" + }, + "outputs": [], + "source": [ + "def evaluate(image):\n", + " attention_plot = np.zeros((max_length, attention_features_shape))\n", + "\n", + " hidden = decoder.reset_state(batch_size=1)\n", + "\n", + " temp_input = tf.expand_dims(load_image(image)[0], 0)\n", + " img_tensor_val = image_features_extract_model(temp_input)\n", + " img_tensor_val = tf.reshape(img_tensor_val, (img_tensor_val.shape[0], -1, img_tensor_val.shape[3]))\n", + "\n", + " features = encoder(img_tensor_val)\n", + "\n", + " dec_input = tf.expand_dims([tokenizer.word_index['']], 0)\n", + " result = []\n", + "\n", + " for i in range(max_length):\n", + " predictions, hidden, attention_weights = decoder(dec_input, features, hidden)\n", + "\n", + " attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()\n", + "\n", + " predicted_id = tf.argmax(predictions[0]).numpy()\n", + " result.append(index_word[predicted_id])\n", + "\n", + " if index_word[predicted_id] == '':\n", + " return result, attention_plot\n", + "\n", + " dec_input = tf.expand_dims([predicted_id], 0)\n", + "\n", + " attention_plot = attention_plot[:len(result), :]\n", + " return result, attention_plot" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "Rprk3HEvZuxb", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "## Try it on your own images\n", - "For fun, below we've provided a method you can use to caption your own images with the model we've just trained. Keep in mind, it was trained on a relatively small amount of data, and your images may be different from the training data (so be prepared for weird results!)\n" - ] + "colab_type": "code", + "id": "fD_y7PD6RPGt" + }, + "outputs": [], + "source": [ + "def plot_attention(image, result, attention_plot):\n", + " temp_image = np.array(Image.open(image))\n", + "\n", + " fig = plt.figure(figsize=(10, 10))\n", + " \n", + " len_result = len(result)\n", + " for l in range(len_result):\n", + " temp_att = np.resize(attention_plot[l], (8, 8))\n", + " ax = fig.add_subplot(len_result//2, len_result//2, l+1)\n", + " ax.set_title(result[l])\n", + " img = ax.imshow(temp_image)\n", + " ax.imshow(temp_att, cmap='gray', alpha=0.6, extent=img.get_extent())\n", + "\n", + " plt.tight_layout()\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, - { - "metadata": { - "id": "9Psd1quzaAWg", - "colab_type": "code", - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - } - }, - "cell_type": "code", - "source": [ - "image_url = 'https://tensorflow.org/images/surf.jpg'\n", - "image_extension = image_url[-4:]\n", - "image_path = tf.keras.utils.get_file('image'+image_extension, \n", - " origin=image_url)\n", - "\n", - "result, attention_plot = evaluate(image_path)\n", - "print ('Prediction Caption:', ' '.join(result))\n", - "plot_attention(image_path, result, attention_plot)\n", - "# opening the image\n", - "Image.open(image_path)" - ], - "execution_count": 0, - "outputs": [] + "colab_type": "code", + "id": "io7ws3ReRPGv" + }, + "outputs": [], + "source": [ + "# captions on the validation set\n", + "rid = np.random.randint(0, len(img_name_val))\n", + "image = img_name_val[rid]\n", + "real_caption = ' '.join([index_word[i] for i in cap_val[rid] if i not in [0]])\n", + "result, attention_plot = evaluate(image)\n", + "\n", + "print ('Real Caption:', real_caption)\n", + "print ('Prediction Caption:', ' '.join(result))\n", + "plot_attention(image, result, attention_plot)\n", + "# opening the image\n", + "Image.open(img_name_val[rid])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Rprk3HEvZuxb" + }, + "source": [ + "## Try it on your own images\n", + "For fun, below we've provided a method you can use to caption your own images with the model we've just trained. Keep in mind, it was trained on a relatively small amount of data, and your images may be different from the training data (so be prepared for weird results!)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "autoexec": { + "startup": false, + "wait_interval": 0 + } }, + "colab_type": "code", + "id": "9Psd1quzaAWg" + }, + "outputs": [], + "source": [ + "image_url = 'https://tensorflow.org/images/surf.jpg'\n", + "image_extension = image_url[-4:]\n", + "image_path = tf.keras.utils.get_file('image'+image_extension, \n", + " origin=image_url)\n", + "\n", + "result, attention_plot = evaluate(image_path)\n", + "print ('Prediction Caption:', ' '.join(result))\n", + "plot_attention(image_path, result, attention_plot)\n", + "# opening the image\n", + "Image.open(image_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "VJZXyJco6uLO" + }, + "source": [ + "# Next steps\n", + "\n", + "Congrats! You've just trained an image captioning model with attention. Next, we recommend taking a look at this example [Neural Machine Translation with Attention]( https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb). It uses a similar architecture to translate between Spanish and English sentences. You can also experiment with training the code in this notebook on a different dataset." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "default_view": {}, + "name": "image_captioning_with_attention.ipynb", + "private_outputs": true, + "provenance": [ { - "metadata": { - "id": "VJZXyJco6uLO", - "colab_type": "text" - }, - "cell_type": "markdown", - "source": [ - "# Next steps\n", - "\n", - "Congrats! You've just trained an image captioning model with attention. Next, we recommend taking a look at this example [Neural Machine Translation with Attention]( https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb). It uses a similar architecture to translate between Spanish and English sentences. You can also experiment with training the code in this notebook on a different dataset." - ] + "file_id": "1HI8OK2sMjcx9CTWVn0122QAHOuXaOaMg", + "timestamp": 1530222436922 } - ] + ], + "toc_visible": true, + "version": "0.3.2", + "views": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 } -- GitLab From 090cb450e25f14942e70c53d0d82ea8f9d164d57 Mon Sep 17 00:00:00 2001 From: Bhavani Subramanian Date: Mon, 5 Nov 2018 15:30:04 -0800 Subject: [PATCH 0148/1078] Fix for build failure (#424) Temporarily merging fix into our master so testing can progress --- tensorflow/core/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index afe4c46c8e..26dd295d0c 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -383,6 +383,7 @@ cc_library( ":lib_platform", ":platform_base", "//tensorflow/core/platform/default/build_config:port", + "@com_google_absl//absl/base", "@snappy", ], ) -- GitLab From 88026690778a4960c23019d13572f0f346f19916 Mon Sep 17 00:00:00 2001 From: "Meng, Peng" Date: Tue, 6 Nov 2018 21:13:00 +0800 Subject: [PATCH 0149/1078] update mkl_softmax comments Change-Id: I95428c0e1d4df73f984b3b1f0e9770ec14688dd1 --- tensorflow/core/kernels/mkl_softmax_op.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index 6ff27b1957..c8b78f6187 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -50,8 +50,6 @@ class MklSoftmaxOp : public OpKernel { // src_tensor now points to the 0-th input of global data struct "context" size_t src_idx = 0; const Tensor& src_tensor = MklGetInput(context, src_idx); - //const int input_dims = src_tensor.dims(); - // printf("input_dims = %d\n", input_dims); // Add: get MklShape MklDnnShape src_mkl_shape; GetMklShape(context, src_idx, &src_mkl_shape); @@ -122,6 +120,8 @@ class MklSoftmaxOp : public OpKernel { // creating a memory descriptor // passing outermost dim as default axis, where the softmax is applied + // If axis is not the last dimension, python op will do a transpose so that we can + // still perform softmax on its last dimension. int axis = input_dims - 1; auto softmax_fwd_desc = softmax_forward::desc(prop_kind::forward_scoring, src.GetOpMemDesc(), axis); -- GitLab From ea684a74937fdf18ade43e42b9b320118f70d3c1 Mon Sep 17 00:00:00 2001 From: joaak <29533036+joaak@users.noreply.github.com> Date: Tue, 6 Nov 2018 16:49:18 -0500 Subject: [PATCH 0150/1078] replace index_word with tokenizer.index_word --- .../image_captioning_with_attention.ipynb | 25 +++---------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb index 09ea021c44..12c5eff2b4 100644 --- a/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb +++ b/tensorflow/contrib/eager/python/examples/generative_examples/image_captioning_with_attention.ipynb @@ -441,25 +441,6 @@ "train_seqs = tokenizer.texts_to_sequences(train_captions)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "colab": { - "autoexec": { - "startup": false, - "wait_interval": 0 - } - }, - "colab_type": "code", - "id": "olQArbgbRPF1" - }, - "outputs": [], - "source": [ - "# creating a reverse mapping (index -> word)\n", - "index_word = {value:key for key, value in tokenizer.word_index.items()}" - ] - }, { "cell_type": "code", "execution_count": null, @@ -1031,9 +1012,9 @@ " attention_plot[i] = tf.reshape(attention_weights, (-1, )).numpy()\n", "\n", " predicted_id = tf.argmax(predictions[0]).numpy()\n", - " result.append(index_word[predicted_id])\n", + " result.append(tokenizer.index_word[predicted_id])\n", "\n", - " if index_word[predicted_id] == '':\n", + " if tokenizer.index_word[predicted_id] == '':\n", " return result, attention_plot\n", "\n", " dec_input = tf.expand_dims([predicted_id], 0)\n", @@ -1092,7 +1073,7 @@ "# captions on the validation set\n", "rid = np.random.randint(0, len(img_name_val))\n", "image = img_name_val[rid]\n", - "real_caption = ' '.join([index_word[i] for i in cap_val[rid] if i not in [0]])\n", + "real_caption = ' '.join([tokenizer.index_word[i] for i in cap_val[rid] if i not in [0]])\n", "result, attention_plot = evaluate(image)\n", "\n", "print ('Real Caption:', real_caption)\n", -- GitLab From 7f642e5afd7ddaad5215958ce3f22523ccb08a9c Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 27 Nov 2017 11:28:59 -0800 Subject: [PATCH 0151/1078] Fix issue in tf.nn.softmax where negative dims could only be -1 This fix tries to address the issue raised in 14916 where negative dims could only be -1 in tf.nn.softmax. The issue was that dims=-1 was handled as a case of "last dim" with `is_last_dim = (dim is -1) or (dim == shape.ndims - 1)` but the generic negative dims were never processed. This fix adds `dim += shape.ndims` for generic negative dims. This fix fixes 14916. Signed-off-by: Yong Tang --- tensorflow/python/ops/nn_ops.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index bc195993c2..0b6d8e836f 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1698,6 +1698,10 @@ def _softmax(logits, compute_op, dim=-1, name=None): # If dim is not the last dimension, we have to do a transpose so that we can # still perform softmax on its last dimension. + # In case dim is negative (and is not last dimension -1), add shape.ndims + if dim < 0: + dim += shape.ndims + # Swap logits' dimension of dim and its last dimension. input_rank = array_ops.rank(logits) dim_axis = dim % shape.ndims -- GitLab From e459d7ed9e843d2e6cad5cee2cfd0cbeb9d0c462 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 27 Nov 2017 11:35:21 -0800 Subject: [PATCH 0152/1078] Add test case for negative dims (other than -1) for tf.nn.softmax Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/softmax_op_test.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py index ef9301d4e3..c87b6728aa 100644 --- a/tensorflow/python/kernel_tests/softmax_op_test.py +++ b/tensorflow/python/kernel_tests/softmax_op_test.py @@ -200,6 +200,15 @@ class SoftmaxTest(test.TestCase): use_gpu=False) self._testOverflow(use_gpu=False) + def testAlongNegativeDimension(self): + self._testSoftmax( + np.array([[[1., 1., 1., 1.], [1., 2., 3., 4.]], + [[2., 3., 4., 5.], [6., 7., 8., 9.]], + [[5., 4., 3., 2.], [1., 2., 3., 4.]]]).astype(np.float32), + dim=-2, + use_gpu=False) + self._testOverflow(use_gpu=False) + def testShapeInference(self): op = nn_ops.softmax([[[1., 1., 1., 1.], [1., 2., 3., 4.]], [[2., 3., 4., 5.], [6., 7., 8., 9.]], -- GitLab From 2ec6dcb7fe33ffac1dc55b9d7f6f23c417cb3dc1 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Tue, 6 Nov 2018 23:12:59 +0000 Subject: [PATCH 0153/1078] Fix broken test Signed-off-by: Yong Tang --- tensorflow/python/ops/nn_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 0b6d8e836f..a2305cefba 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1699,7 +1699,7 @@ def _softmax(logits, compute_op, dim=-1, name=None): # still perform softmax on its last dimension. # In case dim is negative (and is not last dimension -1), add shape.ndims - if dim < 0: + if not isinstance(dim, ops.Tensor) and dim < 0: dim += shape.ndims # Swap logits' dimension of dim and its last dimension. -- GitLab From 47cbd92e296ba18149fb78b87726426475fcd2f4 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 7 Nov 2018 09:52:33 +0800 Subject: [PATCH 0154/1078] 'op name' -> 'op type' --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 65bd568f6c..e6c3916e5d 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2669,7 +2669,7 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( &transpose_nchw_in); // We use same name as original node, but change the op - // name. + // type. NodeBuilder nb(mklop->name(), mklop->type_string()); for (int i = 0; i < mklop_num_inputs; i++) { -- GitLab From 64329f85785c190467798f25a746291a813f192f Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 7 Nov 2018 09:56:58 +0800 Subject: [PATCH 0155/1078] Add a comment: storing the output slots of input nodes. --- tensorflow/core/graph/mkl_layout_pass.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index e6c3916e5d..01f1aa9078 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2672,6 +2672,7 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( // type. NodeBuilder nb(mklop->name(), mklop->type_string()); + // Storing the output slots of the input nodes. for (int i = 0; i < mklop_num_inputs; i++) { if (mklop_in[i].first == transpose_to_nhwc) { // Fill "x": -- GitLab From 8e4ec9ae62135adbc523470af1546c178a7f97c5 Mon Sep 17 00:00:00 2001 From: frreiss Date: Tue, 6 Nov 2018 12:55:29 -0800 Subject: [PATCH 0156/1078] Add missing random seed field to OrderedEnqueuer Fix whitespace Simplify changes Simplify changeset --- tensorflow/python/keras/utils/data_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/utils/data_utils.py b/tensorflow/python/keras/utils/data_utils.py index 01a9d61a84..8e36d4dea7 100644 --- a/tensorflow/python/keras/utils/data_utils.py +++ b/tensorflow/python/keras/utils/data_utils.py @@ -598,7 +598,7 @@ class OrderedEnqueuer(SequenceEnqueuer): def pool_fn(seqs): return multiprocessing.Pool(workers, initializer=init_pool_generator, - initargs=(seqs, self.random_seed)) + initargs=(seqs, None)) return pool_fn def _wait_queue(self): -- GitLab From 3ea1267b9758fdc5582948805cdd852b09f21f6b Mon Sep 17 00:00:00 2001 From: dianlujitao Date: Wed, 7 Nov 2018 13:24:28 +0800 Subject: [PATCH 0157/1078] Install abseil headers to cmake shared library build * Since commit 5f004516 tensorflow::StringPiece is replaced by absl::string_view, so abseil headers should be installed to shared library build to fix compilation error for out-of-source build. * To cleanly copy abseil headers, disable in source build for abseil to avoid src tree been polluted by cmake generated files. * Meanwhile, remove _build suffix from abseil_cpp product name since it's confusing. --- .../contrib/cmake/external/abseil_cpp.cmake | 15 ++++++--------- tensorflow/contrib/cmake/tf_shared_lib.cmake | 4 ++++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/cmake/external/abseil_cpp.cmake b/tensorflow/contrib/cmake/external/abseil_cpp.cmake index 4546dbdecc..b0fee24448 100644 --- a/tensorflow/contrib/cmake/external/abseil_cpp.cmake +++ b/tensorflow/contrib/cmake/external/abseil_cpp.cmake @@ -31,17 +31,17 @@ if (systemlib_ABSEIL_CPP) message(STATUS " abseil_cpp includes: ${ABSEIL_CPP_INCLUDE_DIR}") message(STATUS " abseil_cpp libraries: ${ABSEIL_CPP_LIBRARIES}") - add_custom_target(abseil_cpp_build) - list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp_build) + add_custom_target(abseil_cpp) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp) else (systemlib_ABSEIL_CPP) include (ExternalProject) - set(abseil_cpp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build) + set(abseil_cpp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp) set(abseil_cpp_URL https://github.com/abseil/abseil-cpp/archive/e01d95528ea2137a4a27a88d1f57c6cb260aafed.tar.gz) set(abseil_cpp_HASH SHA256=84043ed402d2a2a6ba4cdddb7e85118b1158fd81fe4ac3a14adc343d054c1e2e) - set(abseil_cpp_BUILD ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build) + set(abseil_cpp_BUILD ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp-build) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") @@ -80,15 +80,12 @@ else (systemlib_ABSEIL_CPP) ${abseil_cpp_BUILD}/absl/types/libabsl_bad_optional_access.a) endif() - ExternalProject_Add(abseil_cpp_build + ExternalProject_Add(abseil_cpp PREFIX abseil_cpp URL ${abseil_cpp_URL} URL_HASH ${abseil_cpp_HASH} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" - BUILD_IN_SOURCE 1 BUILD_BYPRODUCTS ${abseil_cpp_STATIC_LIBRARIES} - BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release - COMMAND ${CMAKE_COMMAND} --build . --config Release INSTALL_COMMAND "" CMAKE_CACHE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=${tensorflow_ENABLE_POSITION_INDEPENDENT_CODE} @@ -99,6 +96,6 @@ else (systemlib_ABSEIL_CPP) include_directories(${abseil_cpp_INCLUDE_DIR}) list(APPEND tensorflow_EXTERNAL_LIBRARIES ${abseil_cpp_STATIC_LIBRARIES}) - list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp_build) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp) endif (systemlib_ABSEIL_CPP) diff --git a/tensorflow/contrib/cmake/tf_shared_lib.cmake b/tensorflow/contrib/cmake/tf_shared_lib.cmake index fdf522f1fd..c1bdc35fc6 100644 --- a/tensorflow/contrib/cmake/tf_shared_lib.cmake +++ b/tensorflow/contrib/cmake/tf_shared_lib.cmake @@ -145,6 +145,10 @@ install(DIRECTORY ${tensorflow_source_dir}/third_party/eigen3/ # unsupported Eigen directory install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/eigen/src/eigen/unsupported/Eigen/ DESTINATION include/unsupported/Eigen) +# absl directory +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/abseil_cpp/src/abseil_cpp/absl/ + DESTINATION include/absl + FILES_MATCHING PATTERN "*.h") # mkl if (tensorflow_ENABLE_MKL_SUPPORT) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/mkl/src/mkl/include/ -- GitLab From 81f02368365e096c27f37adc7c9af08905855ed1 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 7 Nov 2018 14:28:10 +0800 Subject: [PATCH 0158/1078] New algorithm applied to "CheckForNodeFusion()", to cover the pattern "A->B->C;A->C". --- tensorflow/core/graph/mkl_layout_pass.cc | 57 +++++++++++------------- 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 01f1aa9078..fa32a3e061 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -915,17 +915,17 @@ class MklLayoutRewritePass : public GraphOptimizationPass { if (node->type_string() != "Transpose") return false; // If "Transpose" has multiple output data edges, also don't fuse it. - if (node->num_outputs() > 1 || node->out_edges().size() > 1) return false; + // if (node->num_outputs() > 1 || node->out_edges().size() > 1) return false; // Check if has out control edge. If true, this is a training graph. // Currently we focus on inference and do no fusion in training. // Note: this constraint will eventually be removed, if we enabled this fusion for training // in the future. - for (const Edge* e : node->out_edges()) { - if (e->IsControlEdge()) { - return false; - } - } + // for (const Edge* e : node->out_edges()) { + // if (e->IsControlEdge()) { + // return false; + // } + // } // If "Transpose" has input control edges, don't fuse on it. for (const Edge* e : node->in_edges()) { @@ -2736,12 +2736,13 @@ MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { // std::stack> work_stack; - std::unordered_set visited_nodes; + std::stack current_neighbor_stack; auto node_checker = fi->node_checkers.begin(); Node *current_node = nullptr; if (a != nullptr) { work_stack.push(a); + current_neighbor_stack.push(a->out_edges().begin()); } while (!work_stack.empty()) { @@ -2759,33 +2760,29 @@ MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { return make_tuple(true, nodes, *fi_ptr); } - bool all_succ_has_been_visited = true; - for (const Edge *e : current_node->out_edges()) { - if (!e->IsControlEdge()) { - Node *candidate_node = e->dst(); - - // If the candidate node has not been visited, push it to stack. - if (visited_nodes.find(candidate_node) == visited_nodes.end()) { - work_stack.push(candidate_node); - ++ node_checker; - all_succ_has_been_visited = false; - break; - } - } - } - - // All successor nodes of current node has been visited (no match found), - // pop the stack and mark current node as "visited". - if (all_succ_has_been_visited) { - visited_nodes.insert(current_node); - work_stack.pop(); - -- node_checker; + auto ¤t_neighbor_iter = current_neighbor_stack.top(); + if (current_neighbor_iter == current_node->out_edges().end()) { + // All output edges have been exhausted, pop the stack + // and roll back to the preceding node. + work_stack.pop(); + current_neighbor_stack.pop(); + -- node_checker; + } else { + // Found a edge not been visited, go through this edge + // and get the next neighbor. + Node *neighbor_node = (*current_neighbor_iter)->dst(); + work_stack.push(neighbor_node); + current_neighbor_stack.push(neighbor_node->out_edges().begin()); + ++ node_checker; + + // Increase current_neighbor_iter, which is at the top of stack. + ++ current_neighbor_iter; } - } else { // current node doesn't match, pop stack to roll back. - visited_nodes.insert(current_node); + // visited_nodes.insert(current_node); work_stack.pop(); + current_neighbor_stack.pop(); -- node_checker; } } -- GitLab From 117d30b9e313f93a39f17883e2e64960b4015c15 Mon Sep 17 00:00:00 2001 From: "Meng, Peng" Date: Thu, 8 Nov 2018 22:33:42 +0800 Subject: [PATCH 0159/1078] use different layout for mkl and tf Change-Id: Id148c006fa74ca0382af8e67c6437f551fbba1b7 --- tensorflow/core/kernels/mkl_softmax_op.cc | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index c8b78f6187..ca78164ac9 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -62,7 +62,7 @@ class MklSoftmaxOp : public OpKernel { const int input_dims = src_tf_shape.dims(); auto src_dims = TFShapeToMklDnnDims(src_tf_shape); memory::dims output_dims; - if(src_mkl_shape.IsMklTensor()) { + if (src_mkl_shape.IsMklTensor()) { output_dims = src_mkl_shape.GetSizesAsMklDnnDims(); } else { @@ -75,6 +75,7 @@ class MklSoftmaxOp : public OpKernel { // Each of the simbols has the following meaning: // n = batch, c = channels, t = sequence lenght, h = height, // w = width, d = depth + switch (input_dims) { case 1: layout_type = memory::format::x; @@ -86,10 +87,20 @@ class MklSoftmaxOp : public OpKernel { layout_type = memory::format::tnc; break; case 4: - layout_type = memory::format::nhwc; + if (src_mkl_shape.IsMklTensor()) { + layout_type = memory::format::nhwc; + } + else { + layout_type = memory::format::nchw; + } break; case 5: - layout_type = memory::format::ndhwc; + if (src_mkl_shape.IsMklTensor()) { + layout_type = memory::format::ndhwc; + } + else { + layout_type = memory::format::ncdhw; + } break; default: OP_REQUIRES_OK(context, errors::Aborted("Input dims must be <= 5 and >=1")); -- GitLab From c806b163c4d52cf80daecf2d63e3b76c7dc696e6 Mon Sep 17 00:00:00 2001 From: Nutti Date: Sat, 10 Nov 2018 10:41:09 +0900 Subject: [PATCH 0160/1078] OptimizationPass::POST_REWRITE_FOR_EXEC after Grappler optimization in PartitionedCallOp --- tensorflow/core/kernels/partitioned_function_ops.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index 71e506e5e6..72310f33ae 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -166,12 +166,6 @@ class PartitionedCallOp : public AsyncOpKernel { OptimizationPassRegistry::Global()->RunGrouping( OptimizationPassRegistry::POST_PLACEMENT, optimization_options), done); - OP_REQUIRES_OK_ASYNC( - ctx, - OptimizationPassRegistry::Global()->RunGrouping( - OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, - optimization_options), - done); Device* cpu_device; OP_REQUIRES_OK_ASYNC( @@ -184,6 +178,13 @@ class PartitionedCallOp : public AsyncOpKernel { device_set, cpu_device, &graph), done); + OP_REQUIRES_OK_ASYNC( + ctx, + OptimizationPassRegistry::Global()->RunGrouping( + OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, + optimization_options), + done); + std::unordered_map> subgraphs; OP_REQUIRES_OK_ASYNC( ctx, PartitionHelper(device_set, std::move(graph), &subgraphs), -- GitLab From 7d96d6fbd3cdbe215c9dce78f8227ef273b5d37a Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 10 Nov 2018 20:28:31 +0000 Subject: [PATCH 0161/1078] Fix ValueError by image.transform in eager mode This fix tries to address the issue raised in 23654 where in eager mode tf.contrib.image.transform will throw out ``` ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() ``` This fix addresses the issue. This fix fixes 23654. Signed-off-by: Yong Tang --- tensorflow/contrib/image/python/ops/image_ops.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py index d4fb99a017..b25a6f7b57 100644 --- a/tensorflow/contrib/image/python/ops/image_ops.py +++ b/tensorflow/contrib/image/python/ops/image_ops.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import context from tensorflow.contrib.image.ops import gen_image_ops from tensorflow.contrib.util import loader from tensorflow.python.framework import common_shapes @@ -271,8 +272,11 @@ def transform(images, raise TypeError("Images should have rank between 2 and 4.") if output_shape is None: - output_shape = tensor_util.constant_value( - array_ops.shape(images)[1:3]) or array_ops.shape(images)[1:3] + output_shape = array_ops.shape(images)[1:3] + if not context.executing_eagerly(): + output_shape_value = tensor_util.constant_value(output_shape) + if output_shape_value is not None: + output_shape = output_shape_value output_shape = ops.convert_to_tensor( output_shape, dtypes.int32, name="output_shape") -- GitLab From 38455b2e111fa1acef3dd7dd00517b3fc1f1c38f Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 10 Nov 2018 20:30:22 +0000 Subject: [PATCH 0162/1078] Add test case for image.transform in eager mode Signed-off-by: Yong Tang --- .../contrib/image/python/kernel_tests/image_ops_test.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index 4997c31a7f..ebf8a8adb3 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -281,6 +281,14 @@ class ImageOpsTest(test_util.TensorFlowTestCase): value.eval(), np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype())) + @test_util.run_in_graph_and_eager_modes + def test_transform_eager(self): + image = constant_op.constant([[1., 2.], [3., 4.]]) + value = image_ops.transform(image, [1] * 8) + with self.test_session(use_gpu=True): + self.assertAllEqual( + self.evaluate(value), np.array([[4, 4], [4, 4]])) + class BipartiteMatchTest(test_util.TensorFlowTestCase): -- GitLab From c50685de2d680d7e76e4b586e14138f33272a9cb Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 10 Nov 2018 20:33:28 +0000 Subject: [PATCH 0163/1078] Pylint fix Signed-off-by: Yong Tang --- tensorflow/contrib/image/python/kernel_tests/image_ops_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index ebf8a8adb3..ba5cdfebf9 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -286,8 +286,7 @@ class ImageOpsTest(test_util.TensorFlowTestCase): image = constant_op.constant([[1., 2.], [3., 4.]]) value = image_ops.transform(image, [1] * 8) with self.test_session(use_gpu=True): - self.assertAllEqual( - self.evaluate(value), np.array([[4, 4], [4, 4]])) + self.assertAllEqual(self.evaluate(value), np.array([[4, 4], [4, 4]])) class BipartiteMatchTest(test_util.TensorFlowTestCase): -- GitLab From 902b080a85fc78816f0ca0c8b66d80411b372579 Mon Sep 17 00:00:00 2001 From: "Meng, Peng" Date: Mon, 12 Nov 2018 00:48:03 +0800 Subject: [PATCH 0164/1078] fix layout error Change-Id: I24d66af494a9e96cfa13c885b3765f3f74dc2976 --- tensorflow/core/kernels/mkl_softmax_op.cc | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index ca78164ac9..6d644fba69 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -62,10 +62,13 @@ class MklSoftmaxOp : public OpKernel { const int input_dims = src_tf_shape.dims(); auto src_dims = TFShapeToMklDnnDims(src_tf_shape); memory::dims output_dims; + int axis; if (src_mkl_shape.IsMklTensor()) { + axis = 1; output_dims = src_mkl_shape.GetSizesAsMklDnnDims(); } else { + axis = input_dims - 1; output_dims = src_dims; //nhwc } memory::format layout_type; @@ -87,20 +90,10 @@ class MklSoftmaxOp : public OpKernel { layout_type = memory::format::tnc; break; case 4: - if (src_mkl_shape.IsMklTensor()) { - layout_type = memory::format::nhwc; - } - else { - layout_type = memory::format::nchw; - } + layout_type = memory::format::nchw; break; case 5: - if (src_mkl_shape.IsMklTensor()) { - layout_type = memory::format::ndhwc; - } - else { - layout_type = memory::format::ncdhw; - } + layout_type = memory::format::ncdhw; break; default: OP_REQUIRES_OK(context, errors::Aborted("Input dims must be <= 5 and >=1")); @@ -127,15 +120,13 @@ class MklSoftmaxOp : public OpKernel { // data format is "nc" for src and dst; since the src and dst buffer is // always in 2D shape src.SetUsrMem(src_md, &src_tensor); - src.SetOpMemDesc(src_dims, layout_type); // creating a memory descriptor // passing outermost dim as default axis, where the softmax is applied // If axis is not the last dimension, python op will do a transpose so that we can // still perform softmax on its last dimension. - int axis = input_dims - 1; auto softmax_fwd_desc = softmax_forward::desc(prop_kind::forward_scoring, - src.GetOpMemDesc(), axis); + src.GetUsrMemDesc(), axis); auto softmax_fwd_pd = softmax_forward::primitive_desc(softmax_fwd_desc, cpu_engine); -- GitLab From ced3c110f02148bab77d06352ec0465fd87962c0 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Mon, 12 Nov 2018 10:30:55 +0800 Subject: [PATCH 0165/1078] Revert a change: accidently comment a few lines. --- tensorflow/core/graph/mkl_layout_pass.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index fa32a3e061..ac3c817982 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -915,17 +915,17 @@ class MklLayoutRewritePass : public GraphOptimizationPass { if (node->type_string() != "Transpose") return false; // If "Transpose" has multiple output data edges, also don't fuse it. - // if (node->num_outputs() > 1 || node->out_edges().size() > 1) return false; + if (node->num_outputs() > 1 || node->out_edges().size() > 1) return false; // Check if has out control edge. If true, this is a training graph. // Currently we focus on inference and do no fusion in training. // Note: this constraint will eventually be removed, if we enabled this fusion for training // in the future. - // for (const Edge* e : node->out_edges()) { - // if (e->IsControlEdge()) { - // return false; - // } - // } + for (const Edge* e : node->out_edges()) { + if (e->IsControlEdge()) { + return false; + } + } // If "Transpose" has input control edges, don't fuse on it. for (const Edge* e : node->in_edges()) { -- GitLab From 17f1bdd9dda6f690df6fc298ed5c884e197a5d99 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Mon, 12 Nov 2018 13:08:50 +0800 Subject: [PATCH 0166/1078] Some modifications to "CheckForNodeFusion()": 1. check for match before push; 2. use "nodes" instead of "work_stack"; 3. get rid of "fi_ptr"; --- tensorflow/core/graph/mkl_layout_pass.cc | 70 +++++++++--------------- 1 file changed, 27 insertions(+), 43 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index ac3c817982..a5d2008a37 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2725,70 +2725,54 @@ Status MklLayoutRewritePass::FuseNode( std::tuple, const MklLayoutRewritePass::FusionInfo> MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { - const FusionInfo* fi_ptr = nullptr; + // Stores matched nodes, in the same order as node_checkers. + std::vector nodes; for (auto fi = finfo_.begin(); fi != finfo_.end(); ++fi) { - fi_ptr = &*fi; // // Make sure node "a" and its succeding nodes (b, c ...), match the pattern // defined in fusion info (ops[0], ops[1], ...), - // aka. "a->b->c" matches "op1->op2->op3" + // a.k.a. "a->b->c" matches "op1->op2->op3" // - std::stack> work_stack; + // Stores the first unvisted outgoing edge of each matched node in "nodes". std::stack current_neighbor_stack; - auto node_checker = fi->node_checkers.begin(); + nodes.clear(); - Node *current_node = nullptr; - if (a != nullptr) { - work_stack.push(a); + auto node_checker = fi->node_checkers.begin(); + if (a != nullptr && (*node_checker)(a)) { + nodes.push_back(a); current_neighbor_stack.push(a->out_edges().begin()); + ++ node_checker; } - while (!work_stack.empty()) { - current_node = work_stack.top(); + while (!nodes.empty()) { + auto& current_neighbor_iter = current_neighbor_stack.top(); - if ((*node_checker)(current_node)){ - if (node_checker == (fi->node_checkers.end() - 1)) { - // We find a match, break and return. - std::vector nodes; - while (!work_stack.empty()) { - nodes.insert(nodes.begin(), work_stack.top()); - work_stack.pop(); - } - - return make_tuple(true, nodes, *fi_ptr); - } + if (current_neighbor_iter != nodes.back()->out_edges().end()) { + // Found an unvisited edge. Goes through the edge to get the neighbor. + Node* neighbor_node = (*current_neighbor_iter)->dst(); + ++current_neighbor_stack.top(); // Retrieves the next unvisited edge. - auto ¤t_neighbor_iter = current_neighbor_stack.top(); - if (current_neighbor_iter == current_node->out_edges().end()) { - // All output edges have been exhausted, pop the stack - // and roll back to the preceding node. - work_stack.pop(); - current_neighbor_stack.pop(); - -- node_checker; - } else { - // Found a edge not been visited, go through this edge - // and get the next neighbor. - Node *neighbor_node = (*current_neighbor_iter)->dst(); - work_stack.push(neighbor_node); - current_neighbor_stack.push(neighbor_node->out_edges().begin()); - ++ node_checker; - - // Increase current_neighbor_iter, which is at the top of stack. - ++ current_neighbor_iter; + if ((*node_checker)(neighbor_node)) { + // Found a match. Stores the node and moves to the next checker. + nodes.push_back(neighbor_node); + current_neighbor_stack.push(neighbor_node->out_edges().begin()); + if (++node_checker == fi->node_checkers.end()) { + return make_tuple(true, nodes, *fi); + } } } else { - // current node doesn't match, pop stack to roll back. - // visited_nodes.insert(current_node); - work_stack.pop(); + // Removes the current node since none of its neighbor leads to a + // further match. + nodes.pop_back(); current_neighbor_stack.pop(); - -- node_checker; + --node_checker; } } } - return make_tuple(false, std::vector(), *fi_ptr); + return make_tuple(false, std::vector(), FusionInfo()); } /////////////////////////////////////////////////////////////////////////////// -- GitLab From 9b90cbd5f4ff02160def3cffc34d56ceaa2da916 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Mon, 12 Nov 2018 13:18:09 +0800 Subject: [PATCH 0167/1078] Apply clang-format-3.8 --- tensorflow/core/graph/mkl_layout_pass.cc | 29 +++++++++++++----------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index a5d2008a37..d46e7165ff 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -22,12 +22,12 @@ limitations under the License. #include #include #include +#include +#include #include #include #include #include -#include -#include #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/optimization_registry.h" @@ -514,8 +514,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.conv2d_grad_filter_with_bias, GetConv2DBackpropFilterOrBiasAddGrad}); - // The fusion patterns in "finfo_" that show up first will get applied first, - // for example, graph "A->B->C-D" and finfo_ is {A->B->C to ABC, A->B->C->D to ABCD}, + // The fusion patterns in "finfo_" that show up first will get applied + // first, + // for example, graph "A->B->C-D" and finfo_ is {A->B->C to ABC, A->B->C->D + // to ABCD}, // since the first gets applied first, the final graph will be ABC->D. // @@ -903,7 +905,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { Status FuseNode(std::unique_ptr* g, std::vector& nodes, const MklLayoutRewritePass::FusionInfo fi); - // Fuse tranpose(to "NHWC") + mklop("NHWC") + transpose(to "NCHW") into mklop("NCHW"). + // Fuse tranpose(to "NHWC") + mklop("NHWC") + transpose(to "NCHW") into + // mklop("NCHW"). // Here "mklop" can be any MKL-DNN supported op, such as Conv2D. static Status FuseTransposeMklOpTranspose( std::unique_ptr* g, std::vector& nodes, @@ -919,8 +922,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { // Check if has out control edge. If true, this is a training graph. // Currently we focus on inference and do no fusion in training. - // Note: this constraint will eventually be removed, if we enabled this fusion for training - // in the future. + // Note: this constraint will eventually be removed, if we enabled this + // fusion for training + // in the future. for (const Edge* e : node->out_edges()) { if (e->IsControlEdge()) { return false; @@ -1835,7 +1839,6 @@ void MklLayoutRewritePass::CopyAttrsConv(const Node* orig_node, NodeBuilder* nb, new_strides = {strides[NHWC::dim::N], strides[NHWC::dim::C], strides[NHWC::dim::H], strides[NHWC::dim::W]}; - new_dilations = {dilations[NHWC::dim::N], dilations[NHWC::dim::C], dilations[NHWC::dim::H], dilations[NHWC::dim::W]}; @@ -2698,8 +2701,8 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( for (const Edge* e : transpose_to_nchw->out_edges()) { if (!e->IsControlEdge()) { const int kTransposeWithMklOpOutputSlot = 0; - CHECK_NOTNULL((*g)->AddEdge(new_node, kTransposeWithMklOpOutputSlot, e->dst(), - e->dst_input())); + CHECK_NOTNULL((*g)->AddEdge(new_node, kTransposeWithMklOpOutputSlot, + e->dst(), e->dst_input())); } } @@ -2726,7 +2729,7 @@ Status MklLayoutRewritePass::FuseNode( std::tuple, const MklLayoutRewritePass::FusionInfo> MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { // Stores matched nodes, in the same order as node_checkers. - std::vector nodes; + std::vector nodes; for (auto fi = finfo_.begin(); fi != finfo_.end(); ++fi) { // @@ -2743,7 +2746,7 @@ MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { if (a != nullptr && (*node_checker)(a)) { nodes.push_back(a); current_neighbor_stack.push(a->out_edges().begin()); - ++ node_checker; + ++node_checker; } while (!nodes.empty()) { @@ -2772,7 +2775,7 @@ MklLayoutRewritePass::CheckForNodeFusion(Node* a) const { } } - return make_tuple(false, std::vector(), FusionInfo()); + return make_tuple(false, std::vector(), FusionInfo()); } /////////////////////////////////////////////////////////////////////////////// -- GitLab From 6a2f6f8f68ffc55aca43c85ffb0be1cee17d99c8 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Wed, 14 Nov 2018 08:34:30 +0800 Subject: [PATCH 0168/1078] abseil build in windows --- tensorflow/contrib/cmake/CMakeLists.txt | 2 +- tensorflow/contrib/cmake/external/abseil_cpp.cmake | 9 ++++----- tensorflow/contrib/cmake/tf_python.cmake | 2 -- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 39fe8ff00e..d2988a4c7c 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -108,7 +108,7 @@ if (NOT WIN32) # Options for linking other libraries option(systemlib_ZLIB "Use the system installed library as shared objects instead of downloading ZLIB and statically linking to it: ZLIB" OFF) - option(systemlib_ABSEIL_CPP "Use the system installed library as shared objects instead of downloading ABSEIL_CPP and statically linking to it: ABSEIL_CPP" OFF) + option(systemlib_ABSEIL_CPP "Use the system installed library as shared objects instead of downloading ABSEIL_CPP and statically linking to it: ABSEIL_CPP" ON) option(systemlib_ALL "Turn on every possible systemlib_* options" OFF) if (systemlib_ALL) diff --git a/tensorflow/contrib/cmake/external/abseil_cpp.cmake b/tensorflow/contrib/cmake/external/abseil_cpp.cmake index 4546dbdecc..efa8d86d69 100644 --- a/tensorflow/contrib/cmake/external/abseil_cpp.cmake +++ b/tensorflow/contrib/cmake/external/abseil_cpp.cmake @@ -39,8 +39,8 @@ else (systemlib_ABSEIL_CPP) include (ExternalProject) set(abseil_cpp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build) - set(abseil_cpp_URL https://github.com/abseil/abseil-cpp/archive/e01d95528ea2137a4a27a88d1f57c6cb260aafed.tar.gz) - set(abseil_cpp_HASH SHA256=84043ed402d2a2a6ba4cdddb7e85118b1158fd81fe4ac3a14adc343d054c1e2e) + set(abseil_cpp_URL https://github.com/abseil/abseil-cpp.git) + set(abseil_cpp_TAG master) set(abseil_cpp_BUILD ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build) if(WIN32) @@ -82,8 +82,7 @@ else (systemlib_ABSEIL_CPP) ExternalProject_Add(abseil_cpp_build PREFIX abseil_cpp - URL ${abseil_cpp_URL} - URL_HASH ${abseil_cpp_HASH} + GIT_REPOSITORY ${abseil_cpp_URL} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" BUILD_IN_SOURCE 1 BUILD_BYPRODUCTS ${abseil_cpp_STATIC_LIBRARIES} @@ -101,4 +100,4 @@ else (systemlib_ABSEIL_CPP) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES abseil_cpp_build) -endif (systemlib_ABSEIL_CPP) +endif (systemlib_ABSEIL_CPP) \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 7647fe565d..4a9732d757 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -765,8 +765,6 @@ string(REPLACE "# BEGIN GENERATED FILES" "" api_init_files_text ${api_init_files string(REPLACE "# END GENERATED FILES" "" api_init_files_text ${api_init_files_text}) string(REPLACE "," ";" api_init_files_list ${api_init_files_text}) -message(STATUS ${api_init_files_text}) - set(api_init_files "") foreach(api_init_file ${api_init_files_list}) string(STRIP "${api_init_file}" api_init_file) -- GitLab From 669698caf6e886c27d4a9494760078ef3f4f1d40 Mon Sep 17 00:00:00 2001 From: "Meng, Peng" Date: Wed, 14 Nov 2018 09:26:32 +0800 Subject: [PATCH 0169/1078] update comments Change-Id: Ie781dba3b07cee43bf1864ab5155a710d322aa19 --- tensorflow/core/kernels/mkl_softmax_op.cc | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index 6d644fba69..4e093cbf4b 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -69,7 +69,7 @@ class MklSoftmaxOp : public OpKernel { } else { axis = input_dims - 1; - output_dims = src_dims; //nhwc + output_dims = src_dims; } memory::format layout_type; // In MKL, data format passed to mkl softmax op depends on dimension of the input tensor. @@ -113,18 +113,11 @@ class MklSoftmaxOp : public OpKernel { ? src_mkl_shape.GetMklLayout() : memory::desc(src_dims, MklDnnType(), layout_type); - // src: setting memory descriptor and op memory descriptor - // Basically following two functions maps the TF "src_tensor" to mkl - // tensor object "src" + // src: setting memory descriptor // following functions are in mkl_util.h - // data format is "nc" for src and dst; since the src and dst buffer is - // always in 2D shape src.SetUsrMem(src_md, &src_tensor); // creating a memory descriptor - // passing outermost dim as default axis, where the softmax is applied - // If axis is not the last dimension, python op will do a transpose so that we can - // still perform softmax on its last dimension. auto softmax_fwd_desc = softmax_forward::desc(prop_kind::forward_scoring, src.GetUsrMemDesc(), axis); auto softmax_fwd_pd = -- GitLab From 21177b1a53f7ff3a8f964f4ecc73dc009ae9f2fe Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 14 Nov 2018 09:30:31 +0800 Subject: [PATCH 0170/1078] Format a wired comment made by clang-format-3.8 --- tensorflow/core/graph/mkl_layout_pass.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index d46e7165ff..dffd7ef4a4 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -515,10 +515,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { GetConv2DBackpropFilterOrBiasAddGrad}); // The fusion patterns in "finfo_" that show up first will get applied - // first, - // for example, graph "A->B->C-D" and finfo_ is {A->B->C to ABC, A->B->C->D - // to ABCD}, - // since the first gets applied first, the final graph will be ABC->D. + // first, for example, graph "A->B->C-D" and finfo_ is {A->B->C to ABC, A->B->C->D + // to ABCD}, since the first gets applied first, the final graph will be ABC->D. // // Add rules to fuse sequences such as "Transpose (NCHW -> NHWC) + Conv2D -- GitLab From 88b2369fe7c4451c63ff0599f7477897dabff2e0 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Wed, 14 Nov 2018 14:34:05 +0800 Subject: [PATCH 0171/1078] Break very long lines into 2. --- tensorflow/core/graph/mkl_layout_pass.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index dffd7ef4a4..bac434886f 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -515,8 +515,9 @@ class MklLayoutRewritePass : public GraphOptimizationPass { GetConv2DBackpropFilterOrBiasAddGrad}); // The fusion patterns in "finfo_" that show up first will get applied - // first, for example, graph "A->B->C-D" and finfo_ is {A->B->C to ABC, A->B->C->D - // to ABCD}, since the first gets applied first, the final graph will be ABC->D. + // first, for example, graph "A->B->C-D" and finfo_ is {A->B->C to ABC, + // A->B->C->D to ABCD}, since the first gets applied first, the final + // graph will be ABC->D. // // Add rules to fuse sequences such as "Transpose (NCHW -> NHWC) + Conv2D -- GitLab From ada03a97ab77bdd58cd96d3a3fdca490278165d4 Mon Sep 17 00:00:00 2001 From: Abhinav Upadhyay Date: Wed, 14 Nov 2018 18:34:43 +0530 Subject: [PATCH 0172/1078] Fix a TypeError We cannot concatenate string and FailedPreConditionError --- tensorflow/contrib/tpu/python/tpu/keras_support.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/keras_support.py b/tensorflow/contrib/tpu/python/tpu/keras_support.py index 08f58a5f5b..ac72e3f55d 100644 --- a/tensorflow/contrib/tpu/python/tpu/keras_support.py +++ b/tensorflow/contrib/tpu/python/tpu/keras_support.py @@ -132,7 +132,7 @@ def _tpu_session_context(): An error occurred connecting or initializing your TPU. The session has been reset. re-run keras_to_tpu_model to create a new session. -""" + e) +""" + str(e)) def setup_tpu_session(cluster_resolver): -- GitLab From a3a8dfb385be626748bc31a1e10806af5d35dfd5 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Wed, 14 Nov 2018 22:32:17 +0800 Subject: [PATCH 0173/1078] nccl is removed from contrib --- tensorflow/contrib/cmake/CMakeLists.txt | 10 +++++++++- tensorflow/contrib/cmake/external/abseil_cpp.cmake | 6 +++--- tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake | 6 +++--- tensorflow/contrib/cmake/tf_python.cmake | 3 --- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index d2988a4c7c..3da938a8d0 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -108,7 +108,7 @@ if (NOT WIN32) # Options for linking other libraries option(systemlib_ZLIB "Use the system installed library as shared objects instead of downloading ZLIB and statically linking to it: ZLIB" OFF) - option(systemlib_ABSEIL_CPP "Use the system installed library as shared objects instead of downloading ABSEIL_CPP and statically linking to it: ABSEIL_CPP" ON) + option(systemlib_ABSEIL_CPP "Use the system installed library as shared objects instead of downloading ABSEIL_CPP and statically linking to it: ABSEIL_CPP" OFF) option(systemlib_ALL "Turn on every possible systemlib_* options" OFF) if (systemlib_ALL) @@ -293,6 +293,14 @@ else (systemlib_ZLIB) ${zlib_STATIC_LIBRARIES}) endif (systemlib_ZLIB) +if (systemlib_ABSEIL_CPP) + set(tensorflow_EXTERNAL_LIBRARIES ${tensorflow_EXTERNAL_LIBRARIES} + ${abseil_cpp_LIBRARIES}) +else (systemlib_ABSEIL_CPP) + set(tensorflow_EXTERNAL_LIBRARIES ${tensorflow_EXTERNAL_LIBRARIES} + ${abseil_cpp_STATIC_LIBRARIES}) +endif (systemlib_ABSEIL_CPP) + set(tensorflow_EXTERNAL_DEPENDENCIES zlib_copy_headers_to_destination gif_copy_headers_to_destination diff --git a/tensorflow/contrib/cmake/external/abseil_cpp.cmake b/tensorflow/contrib/cmake/external/abseil_cpp.cmake index efa8d86d69..a97704650a 100644 --- a/tensorflow/contrib/cmake/external/abseil_cpp.cmake +++ b/tensorflow/contrib/cmake/external/abseil_cpp.cmake @@ -47,10 +47,10 @@ else (systemlib_ABSEIL_CPP) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") set(abseil_cpp_STATIC_LIBRARIES ${abseil_cpp_BUILD}/absl/base/Release/absl_base.lib - ${abseil_cpp_BUILD}/absl/base/Release/absl_spinlock_wait.lib + ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_spinlock_wait.lib ${abseil_cpp_BUILD}/absl/base/Release/absl_dynamic_annotations.lib - ${abseil_cpp_BUILD}/absl/base/Release/absl_malloc_internal.lib - ${abseil_cpp_BUILD}/absl/base/Release/absl_throw_delegate.lib + ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_malloc_internal.lib + ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_throw_delegate.lib ${abseil_cpp_BUILD}/absl/numeric/Release/absl_int128.lib ${abseil_cpp_BUILD}/absl/strings/Release/absl_strings.lib ${abseil_cpp_BUILD}/absl/strings/Release/str_format_internal.lib diff --git a/tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake b/tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake index d4f8bb1bec..944ae3997a 100644 --- a/tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake +++ b/tensorflow/contrib/cmake/modules/FindAbseilCpp.cmake @@ -24,10 +24,10 @@ if(EXISTS "${ABSEIL_CPP_INCLUDE_DIR}" AND NOT "${ABSEIL_CPP_INCLUDE_DIR}" STREQU # search all libraries if no COMPONENTS was requested set(AbseilCpp_FIND_COMPONENTS "absl_algorithm;absl_any;absl_bad_any_cast" - "absl_bad_optional_access;absl_base absl_container;absl_debugging" + "absl_bad_optional_access;absl_base;absl_container;absl_debugging" "absl_dynamic_annotations;absl_examine_stack;absl_failure_signal_handler" - "absl_int128;absl_leak_check;absl_malloc_internal;absl_memory;absl_meta" - "absl_numeric;absl_optional;absl_span;absl_spinlock_wait;absl_stack_consumption" + "absl_int128;absl_leak_check;absl_internal_malloc_internal;absl_memory;absl_meta" + "absl_numeric;absl_optional;absl_span;absl_internal_spinlock_wait;absl_stack_consumption" "absl_stacktrace;absl_str_format;absl_strings;absl_symbolize;absl_synchronization" "absl_throw_delegate;absl_time;absl_utility;str_format_extension_internal" "str_format_internal;test_instance_tracker_lib") diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 4a9732d757..8faccf8d55 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -398,11 +398,8 @@ GENERATE_PYTHON_OP_LIB("contrib_layers_sparse_feature_cross_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/layers/ops/gen_sparse_feature_cross_op.py) GENERATE_PYTHON_OP_LIB("contrib_memory_stats_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/memory_stats/ops/gen_memory_stats_ops.py) -GENERATE_PYTHON_OP_LIB("contrib_nccl_ops" - DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/nccl/ops/gen_nccl_ops.py) GENERATE_PYTHON_OP_LIB("contrib_periodic_resample_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/periodic_resample/python/ops/gen_periodic_resample_op.py) - GENERATE_PYTHON_OP_LIB("contrib_nearest_neighbor_ops" DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/tf_python/tensorflow/contrib/nearest_neighbor/ops/gen_nearest_neighbor_ops.py) GENERATE_PYTHON_OP_LIB("contrib_resampler_ops" -- GitLab From fbd2c76afc0d884161845e256c7191dc915ec8c3 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Wed, 14 Nov 2018 23:01:27 +0800 Subject: [PATCH 0174/1078] remove change on pip setup --- tensorflow/tools/pip_package/setup.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index afcc7a8601..e15655ea44 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -45,10 +45,8 @@ DOCLINES = __doc__.split('\n') # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. - _VERSION = '1.12.0-rc0' - REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', 'astor >= 0.6.0', @@ -60,8 +58,7 @@ REQUIRED_PACKAGES = [ 'protobuf >= 3.6.1', 'tensorboard >= 1.12.0, < 1.13.0', 'tensorflow_estimator >= 1.10.0', - 'termcolor >= 1.1.0', - 'absl-py >= 0.1.9' + 'termcolor >= 1.1.0' ] if sys.byteorder == 'little': -- GitLab From 7f82d6d20981307ce9c5e40c39d1f510d523e3bb Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Wed, 14 Nov 2018 23:09:31 +0800 Subject: [PATCH 0175/1078] remove change in non cmake folder --- tensorflow/core/util/cuda_device_functions.h | 14 -------------- tensorflow/core/util/cuda_launch_config.h | 4 ---- tensorflow/tools/pip_package/setup.py | 2 +- 3 files changed, 1 insertion(+), 19 deletions(-) diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h index c05e410abc..2fc142a905 100644 --- a/tensorflow/core/util/cuda_device_functions.h +++ b/tensorflow/core/util/cuda_device_functions.h @@ -30,23 +30,9 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#ifdef _WIN32 -#include "cuda.h" -#include "device_functions.h" -#else #include "cuda/include/cuda.h" -#include "cuda/include/device_functions.h" -#endif #include "tensorflow/core/platform/types.h" -#if CUDA_VERSION >= 7050 -#ifdef _WIN32 -#include "cuda_fp16.h" -#else -#include "cuda/include/cuda_fp16.h" -#endif -#endif // CUDA_VERSION >= 7050 - namespace tensorflow { namespace detail { diff --git a/tensorflow/core/util/cuda_launch_config.h b/tensorflow/core/util/cuda_launch_config.h index af7ea94ff4..080d4067ce 100644 --- a/tensorflow/core/util/cuda_launch_config.h +++ b/tensorflow/core/util/cuda_launch_config.h @@ -21,11 +21,7 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#ifdef PLATFORM_GOOGLE #include "cuda/include/cuda.h" -#else -#include "cuda.h" -#endif #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index e15655ea44..07475cc0c4 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -58,7 +58,7 @@ REQUIRED_PACKAGES = [ 'protobuf >= 3.6.1', 'tensorboard >= 1.12.0, < 1.13.0', 'tensorflow_estimator >= 1.10.0', - 'termcolor >= 1.1.0' + 'termcolor >= 1.1.0', ] if sys.byteorder == 'little': -- GitLab From 4d150cb81e103759d2f54f179ded573b8c7f07e0 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Wed, 14 Nov 2018 23:13:20 +0800 Subject: [PATCH 0176/1078] remove change in non cmake folder --- tensorflow/core/platform/default/logging.h | 6 ------ tensorflow/core/util/cuda_device_functions.h | 1 - 2 files changed, 7 deletions(-) diff --git a/tensorflow/core/platform/default/logging.h b/tensorflow/core/platform/default/logging.h index 483f095ee7..08a692fff7 100644 --- a/tensorflow/core/platform/default/logging.h +++ b/tensorflow/core/platform/default/logging.h @@ -187,12 +187,6 @@ string* MakeCheckOpString(const T1& v1, const T2& v2, const char* exprtext) { return comb.NewString(); } -// for MSVC build, the max and min function maybe defined in other macros -#ifdef _WIN32 -#undef max -#undef min -#endif - // Helper functions for CHECK_OP macro. // The (int, int) specialization works around the issue that the compiler // will not instantiate the template version of the function on values of diff --git a/tensorflow/core/util/cuda_device_functions.h b/tensorflow/core/util/cuda_device_functions.h index 2fc142a905..b91f8bb8ef 100644 --- a/tensorflow/core/util/cuda_device_functions.h +++ b/tensorflow/core/util/cuda_device_functions.h @@ -29,7 +29,6 @@ limitations under the License. #include #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - #include "cuda/include/cuda.h" #include "tensorflow/core/platform/types.h" -- GitLab From 6af7756a91da9a548224c4a836cce078edc77538 Mon Sep 17 00:00:00 2001 From: Jacky Ko Date: Thu, 15 Nov 2018 22:02:49 +0800 Subject: [PATCH 0177/1078] PR requested changes --- tensorflow/contrib/cmake/CMakeLists.txt | 2 +- .../contrib/cmake/external/abseil_cpp.cmake | 10 +++---- tensorflow/contrib/cmake/tf_c.cmake | 27 ------------------- 3 files changed, 5 insertions(+), 34 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 3da938a8d0..2ad9ae42a1 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -12,7 +12,7 @@ if(WIN32) endif() # Project -project(tensorflow VERSION 1.8.0 LANGUAGES C CXX) +project(tensorflow VERSION 1.12.0 LANGUAGES C CXX) # Set C++14 as standard for the whole project set(CMAKE_CXX_STANDARD 14) diff --git a/tensorflow/contrib/cmake/external/abseil_cpp.cmake b/tensorflow/contrib/cmake/external/abseil_cpp.cmake index a97704650a..8b76f37858 100644 --- a/tensorflow/contrib/cmake/external/abseil_cpp.cmake +++ b/tensorflow/contrib/cmake/external/abseil_cpp.cmake @@ -39,19 +39,16 @@ else (systemlib_ABSEIL_CPP) include (ExternalProject) set(abseil_cpp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build) - set(abseil_cpp_URL https://github.com/abseil/abseil-cpp.git) - set(abseil_cpp_TAG master) + set(abseil_cpp_URL https://github.com/abseil/abseil-cpp/archive/e01d95528ea2137a4a27a88d1f57c6cb260aafed.tar.gz) + set(abseil_cpp_HASH SHA256=84043ed402d2a2a6ba4cdddb7e85118b1158fd81fe4ac3a14adc343d054c1e2e) set(abseil_cpp_BUILD ${CMAKE_BINARY_DIR}/abseil_cpp/src/abseil_cpp_build) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") set(abseil_cpp_STATIC_LIBRARIES ${abseil_cpp_BUILD}/absl/base/Release/absl_base.lib - ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_spinlock_wait.lib ${abseil_cpp_BUILD}/absl/base/Release/absl_dynamic_annotations.lib ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_malloc_internal.lib - ${abseil_cpp_BUILD}/absl/base/Release/absl_internal_throw_delegate.lib - ${abseil_cpp_BUILD}/absl/numeric/Release/absl_int128.lib ${abseil_cpp_BUILD}/absl/strings/Release/absl_strings.lib ${abseil_cpp_BUILD}/absl/strings/Release/str_format_internal.lib ${abseil_cpp_BUILD}/absl/types/Release/absl_bad_optional_access.lib) @@ -82,7 +79,8 @@ else (systemlib_ABSEIL_CPP) ExternalProject_Add(abseil_cpp_build PREFIX abseil_cpp - GIT_REPOSITORY ${abseil_cpp_URL} + URL ${abseil_cpp_URL} + URL_HASH ${abseil_cpp_HASH} DOWNLOAD_DIR "${DOWNLOAD_LOCATION}" BUILD_IN_SOURCE 1 BUILD_BYPRODUCTS ${abseil_cpp_STATIC_LIBRARIES} diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index 7de56147d7..a04142bd24 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -13,33 +13,6 @@ # limitations under the License. # ============================================================================== -# 1. Resolve the installed version of Python (for Python.h and python). -# TODO(mrry): Parameterize the build script to enable Python 3 building. -if(NOT PYTHON_INCLUDE_DIR) - set(PYTHON_NOT_FOUND false) - exec_program("${PYTHON_EXECUTABLE}" - ARGS "-c \"import distutils.sysconfig; print(distutils.sysconfig.get_python_inc())\"" - OUTPUT_VARIABLE PYTHON_INCLUDE_DIR - RETURN_VALUE PYTHON_NOT_FOUND) - if(${PYTHON_NOT_FOUND}) - message(FATAL_ERROR - "Cannot get Python include directory. Is distutils installed?") - endif(${PYTHON_NOT_FOUND}) -endif(NOT PYTHON_INCLUDE_DIR) - -# 2. Resolve the installed version of NumPy (for numpy/arrayobject.h). -if(NOT NUMPY_INCLUDE_DIR) - set(NUMPY_NOT_FOUND false) - exec_program("${PYTHON_EXECUTABLE}" - ARGS "-c \"import numpy; print(numpy.get_include())\"" - OUTPUT_VARIABLE NUMPY_INCLUDE_DIR - RETURN_VALUE NUMPY_NOT_FOUND) - if(${NUMPY_NOT_FOUND}) - message(FATAL_ERROR - "Cannot get NumPy include directory: Is NumPy installed?") - endif(${NUMPY_NOT_FOUND}) -endif(NOT NUMPY_INCLUDE_DIR) - ######################################################## # tf_c_framework library ######################################################## -- GitLab From 71ba0ec86e7cde759006f17979f71e863602182c Mon Sep 17 00:00:00 2001 From: Ouwen Huang Date: Thu, 15 Nov 2018 14:28:48 -0500 Subject: [PATCH 0178/1078] Update weight_decay_optimizers.py --- .../opt/python/training/weight_decay_optimizers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py b/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py index 1e8351b70f..8b8065c678 100644 --- a/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py +++ b/tensorflow/contrib/opt/python/training/weight_decay_optimizers.py @@ -64,10 +64,10 @@ class DecoupledWeightDecayExtension(object): the decay to the `weight_decay` as well. For example: ```python - decay = tf.train.piecewise_constant(tf.train.get_global_step(), - [10000, 15000], [1e-1, 1e-2, 1e-3]) - lr = 1*decay - wd = 1e-4*decay + schedule = tf.train.piecewise_constant(tf.train.get_global_step(), + [10000, 15000], [1e-0, 1e-1, 1e-2]) + lr = 1e-1 * schedule() + wd = lambda: 1e-4 * schedule() # ... -- GitLab From 489e181be77b83b2b631f48968aaf40897001838 Mon Sep 17 00:00:00 2001 From: Siju Date: Fri, 16 Nov 2018 11:56:50 +0530 Subject: [PATCH 0179/1078] Update graph_transformations.h --- .../lite/toco/graph_transformations/graph_transformations.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/lite/toco/graph_transformations/graph_transformations.h index 73a90c8239..187b584b69 100644 --- a/tensorflow/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/lite/toco/graph_transformations/graph_transformations.h @@ -139,7 +139,7 @@ DECLARE_GRAPH_TRANSFORMATION(MakeInitialDequantizeOperator) DECLARE_GRAPH_TRANSFORMATION(MoveBinaryOperatorBeforeReshape) DECLARE_GRAPH_TRANSFORMATION(PropagateActivationFunctionIntoConstants) DECLARE_GRAPH_TRANSFORMATION(PropagateArrayDataTypes) -DECLARE_GRAPH_TRANSFORMATION(PropagateFakeQuantNumBits); +DECLARE_GRAPH_TRANSFORMATION(PropagateFakeQuantNumBits) DECLARE_GRAPH_TRANSFORMATION(PropagateFixedSizes) DECLARE_GRAPH_TRANSFORMATION(HardcodeMinMax) DECLARE_GRAPH_TRANSFORMATION(Quantize) -- GitLab From 27171a09e5812d3c8d237c69aa5d53250e7f1696 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 18 Nov 2018 01:28:50 +0000 Subject: [PATCH 0180/1078] Fix deprecated div While running test I noticed the following warning: ``` WARNING:tensorflow:From /usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/nn_ops.py:2744: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version. Instructions for updating: Deprecated in favor of operator or tf.math.divide. ``` This fix fixes the deprecated warning. Signed-off-by: Yong Tang --- tensorflow/python/ops/nn_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 21008fc392..223a37c87e 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2741,7 +2741,7 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: di noise_shape, seed=seed, dtype=x.dtype) # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) binary_tensor = math_ops.floor(random_tensor) - ret = math_ops.div(x, keep_prob) * binary_tensor + ret = math_ops.divide(x, keep_prob) * binary_tensor if not context.executing_eagerly(): ret.set_shape(x.get_shape()) return ret -- GitLab From 2427ff8fe9a24f4d9581716af46ef07f99408e0f Mon Sep 17 00:00:00 2001 From: "Meng, Peng" Date: Wed, 24 Oct 2018 15:49:46 +0800 Subject: [PATCH 0181/1078] fix layout error when src tensor is mkl Change-Id: I6bcfc8981867f1b60591c65fde77c92cff298694 --- tensorflow/core/kernels/mkl_softmax_op.cc | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/mkl_softmax_op.cc b/tensorflow/core/kernels/mkl_softmax_op.cc index 4e093cbf4b..25c0c7b078 100644 --- a/tensorflow/core/kernels/mkl_softmax_op.cc +++ b/tensorflow/core/kernels/mkl_softmax_op.cc @@ -90,10 +90,20 @@ class MklSoftmaxOp : public OpKernel { layout_type = memory::format::tnc; break; case 4: - layout_type = memory::format::nchw; + if (src_mkl_shape.IsMklTensor()) { + layout_type = memory::format::nhwc; + } + else { + layout_type = memory::format::nchw; + } break; case 5: - layout_type = memory::format::ncdhw; + if (src_mkl_shape.IsMklTensor()) { + layout_type = memory::format::ndhwc; + } + else { + layout_type = memory::format::ncdhw; + } break; default: OP_REQUIRES_OK(context, errors::Aborted("Input dims must be <= 5 and >=1")); -- GitLab From e320fba1e9349dee60ba1e06e1f6bbc08c2a85c1 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 22 Nov 2018 20:47:59 +0000 Subject: [PATCH 0182/1078] Update re2 library to 2018-10-01 This fix updates re2 library to the latest release of 2018-10-01 Signed-off-by: Yong Tang --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 7ad094c507..065a695453 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -168,12 +168,12 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_googlesource_code_re2", - sha256 = "803c7811146edeef8f91064de37c6f19136ff01a2a8cdb3230e940b2fd9f07fe", - strip_prefix = "re2-2018-07-01", + sha256 = "a31397714a353587413d307337d0b58f8a2e20e2b9d02f2e24e3463fa4eeda81", + strip_prefix = "re2-2018-10-01", system_build_file = clean_dep("//third_party/systemlibs:re2.BUILD"), urls = [ - "https://mirror.bazel.build/github.com/google/re2/archive/2018-07-01.tar.gz", - "https://github.com/google/re2/archive/2018-07-01.tar.gz", + "https://mirror.bazel.build/github.com/google/re2/archive/2018-10-01.tar.gz", + "https://github.com/google/re2/archive/2018-10-01.tar.gz", ], ) -- GitLab From 437aeb55cc89fade6e386205b30148bc21471bb1 Mon Sep 17 00:00:00 2001 From: Castiel Date: Fri, 23 Nov 2018 07:31:57 +1030 Subject: [PATCH 0183/1078] Minor change in word2vec_basic tutorial --- tensorflow/examples/tutorials/word2vec/word2vec_basic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py index b09ee99768..bbcfc32098 100644 --- a/tensorflow/examples/tutorials/word2vec/word2vec_basic.py +++ b/tensorflow/examples/tutorials/word2vec/word2vec_basic.py @@ -91,7 +91,7 @@ vocabulary_size = 50000 def build_dataset(words, n_words): """Process raw inputs into a dataset.""" - count = [['UNK', -1]] + count = [('UNK', -1)] count.extend(collections.Counter(words).most_common(n_words - 1)) dictionary = dict() for word, _ in count: @@ -125,6 +125,7 @@ data_index = 0 # Step 3: Function to generate a training batch for the skip-gram model. def generate_batch(batch_size, num_skips, skip_window): + global data global data_index assert batch_size % num_skips == 0 assert num_skips <= 2 * skip_window -- GitLab From 48809b87793882266f01b7b40bc9e4a6e0f18f57 Mon Sep 17 00:00:00 2001 From: AG Ramesh Date: Fri, 23 Nov 2018 11:44:19 -0800 Subject: [PATCH 0184/1078] Fixed merge errors and clang format issues --- tensorflow/core/graph/mkl_layout_pass.cc | 6 +- tensorflow/core/graph/mkl_layout_pass_test.cc | 47 +++-- tensorflow/core/kernels/mkl_conv_ops.cc | 180 +++++++++--------- tensorflow/core/kernels/mkl_conv_ops.h | 14 +- tensorflow/core/kernels/mkl_fused_ops_test.cc | 2 - tensorflow/core/ops/nn_ops.cc | 3 +- 6 files changed, 121 insertions(+), 131 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 8de0fc6083..de1a982b9d 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2289,13 +2289,13 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, // We handle control edges now. for (const Edge* e : pred->in_edges()) { if (e->IsControlEdge()) { - //Don't allow duplicate edge + // Don't allow duplicate edge (*g)->AddControlEdge(e->src(), new_node, false); } } for (const Edge* e : succ->in_edges()) { if (e->IsControlEdge()) { - //Don't allow duplicate edge + // Don't allow duplicate edge (*g)->AddControlEdge(e->src(), new_node, false); } } @@ -2304,7 +2304,7 @@ Status MklLayoutRewritePass::MergePadWithConv2D(std::unique_ptr* g, // First, we will fix outgoing control edges from 'pred' node. for (const Edge* e : pred->out_edges()) { if (e->IsControlEdge()) { - //Don't allow duplicate edge + // Don't allow duplicate edge (*g)->AddControlEdge(new_node, e->dst(), false); } } diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 0c8d7f7dbb..fa059f1194 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -138,13 +138,8 @@ REGISTER_OP("_MklInput2") .Output("o: uint8") .Output("o1: uint8") .SetIsStateful(); -REGISTER_OP("Output2") - .Input("i: float") - .Input("i1: float") - .SetIsStateful(); -REGISTER_OP("Output") - .Input("i: float") - .SetIsStateful(); +REGISTER_OP("Output2").Input("i: float").Input("i1: float").SetIsStateful(); +REGISTER_OP("Output").Input("i: float").SetIsStateful(); ///////////////////////////////////////////////////////////////////// // Unit tests related to node merge optiimization @@ -163,7 +158,6 @@ TEST_F(MklLayoutPassTest, Basic) { "A->C;A->D;B->C:1;B->D:1"); } - // Test set 1: Conv2D + AddBias // C=Conv2D(A,B); E=BiasAdd(C,D); Z=Zeta(E,Y) @@ -470,7 +464,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_Conv2DWithBias_ConvBpropInput_FilterFwd) { "E:3->G:4;F->G;F:control->DMT/_3:control;G->Z;X->Y:1;X->Z:1"); } -// Test set 3: Pad + Conv2D fusion +// Test set 3: Pad + Conv2D fusion // padding is VALID type // A = input(image), B = input(paddings), C= Pad = input of conv2D, // D=input(filter), E = Conv2D, Z = Zeta @@ -508,10 +502,10 @@ TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Positive) { } // Test if input control edges do not duplicate after merge. // If both the merging ops have input control edge from a common op -// then, the merged op will have only one control edge from that +// then, the merged op will have only one control edge from that // common op. // padding is VALID type -// A = input(image), A1 = input, B = input(paddings), +// A = input(image), A1 = input, B = input(paddings), // C= Pad = input of conv2D, // D=input(filter), E = Conv2D, Z = Zeta // C=Pad(A,B); E=Conv2D(C,D); Z=Zeta(E,Y) @@ -550,12 +544,14 @@ TEST_F(MklLayoutPassTest, Input_ControlEdge_PadWithConv2D_Positive) { const Edge* edge_1 = graph_.AddControlEdge(a1, e); ASSERT_NE(edge, nullptr); ASSERT_NE(edge_1, nullptr); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);A1(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" - "A1:control->E:control;A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" - "DMT/_2->E:5;E->Z;Y->Z:1"); + EXPECT_EQ( + DoMklLayoutOptimizationPass(), + "A(Input);A1(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" + "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" + "A1:control->E:control;A:control->DMT/_0:control;A:control->DMT/" + "_1:control;" + "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" + "DMT/_2->E:5;E->Z;Y->Z:1"); } // Test if output control edges does not duplicate after merge. // If both the merging ops have output control edge to a common op, @@ -600,16 +596,17 @@ TEST_F(MklLayoutPassTest, Output_ControlEdge_PadWithConv2D_Positive) { const Edge* edge_1 = graph_.AddControlEdge(e, a1); ASSERT_NE(edge, nullptr); ASSERT_NE(edge_1, nullptr); - EXPECT_EQ(DoMklLayoutOptimizationPass(), - "A(Input);A1(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" - "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" - "A:control->DMT/_0:control;A:control->DMT/_1:control;" - "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" - "DMT/_2->E:5;E->Z;E:control->A1:control;Y->Z:1"); + EXPECT_EQ( + DoMklLayoutOptimizationPass(), + "A(Input);A1(Input);B(Int32Input);D(Input);DMT/_0(Const);DMT/_1(Const);" + "DMT/_2(Const);E(_MklPadWithConv2D);Y(Input);Z(Zeta)|A->E;" + "A:control->DMT/_0:control;A:control->DMT/_1:control;" + "A:control->DMT/_2:control;B->E:2;D->E:1;DMT/_0->E:3;DMT/_1->E:4;" + "DMT/_2->E:5;E->Z;E:control->A1:control;Y->Z:1"); } // Pad + Conv2D fusion with padding is VALID, // Input node pointing to both Pad and Conv2D -// A = input(image), B = input(paddings), C= Pad +// A = input(image), B = input(paddings), C= Pad // E = Conv2D, Z = Zeta // C=Pad(A,B); E=Conv2D(C,A); Z=Zeta(E,Y) // After layout pass @@ -645,7 +642,7 @@ TEST_F(MklLayoutPassTest, NodeMerge_PadWithConv2D_Common_Input) { // Pad + Conv2D with padding is VALID, // Input node pointing to both Pad and Conv2D // Output of both Pad and Conv2D feeds one node (Z as Output2) -// A = input(as image), B = input(as paddings), C= Pad +// A = input(as image), B = input(as paddings), C= Pad // E = Conv2D, Z = Output2 // C=Pad(A,B); E=Conv2D(C,A); Z=Output(C,E) // After layout pass - No merging, since Pad and Conv2D both diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index cfc36d1495..9193d00592 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -465,19 +465,18 @@ class MklConvOp : public OpKernel { filter.shape().DebugString())); for (int i = 0; i < 3; i++) { - OP_REQUIRES( - context, - FastBoundsCheck(filter.dim_size(i), std::numeric_limits::max()), - errors::InvalidArgument("filter too large")); + OP_REQUIRES(context, FastBoundsCheck(filter.dim_size(i), + std::numeric_limits::max()), + errors::InvalidArgument("filter too large")); } const int64 input_depth = input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'C') : GetTensorDim(input, data_format_, 'C'); - OP_REQUIRES(context, input_depth == filter.dim_size(2), - errors::InvalidArgument( - "input and filter must have the same depth: ", input_depth, - " vs ", filter.dim_size(2))); + OP_REQUIRES( + context, input_depth == filter.dim_size(2), + errors::InvalidArgument("input and filter must have the same depth: ", + input_depth, " vs ", filter.dim_size(2))); // The last dimension for filter is out_depth. const int out_depth = static_cast(filter.dim_size(3)); @@ -486,10 +485,9 @@ class MklConvOp : public OpKernel { const int64 input_rows_raw = input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'H') : GetTensorDim(input, data_format_, 'H'); - OP_REQUIRES( - context, - FastBoundsCheck(input_rows_raw, std::numeric_limits::max()), - errors::InvalidArgument("Input rows too large")); + OP_REQUIRES(context, FastBoundsCheck(input_rows_raw, + std::numeric_limits::max()), + errors::InvalidArgument("Input rows too large")); const int input_rows = static_cast(input_rows_raw); const int filter_rows = static_cast(filter.dim_size(0)); @@ -498,10 +496,9 @@ class MklConvOp : public OpKernel { const int64 input_cols_raw = input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'W') : GetTensorDim(input, data_format_, 'W'); - OP_REQUIRES( - context, - FastBoundsCheck(input_cols_raw, std::numeric_limits::max()), - errors::InvalidArgument("Input cols too large")); + OP_REQUIRES(context, FastBoundsCheck(input_cols_raw, + std::numeric_limits::max()), + errors::InvalidArgument("Input cols too large")); const int input_cols = static_cast(input_cols_raw); const int filter_cols = static_cast(filter.dim_size(1)); @@ -509,10 +506,9 @@ class MklConvOp : public OpKernel { const int64 input_batch_raw = input_in_mkl_format ? GetMklTensorDim(mkl_context.input_shape, 'N') : GetTensorDim(input, data_format_, 'N'); - OP_REQUIRES( - context, - FastBoundsCheck(input_batch_raw, std::numeric_limits::max()), - errors::InvalidArgument("batch is too large")); + OP_REQUIRES(context, FastBoundsCheck(input_batch_raw, + std::numeric_limits::max()), + errors::InvalidArgument("batch is too large")); const int batch = static_cast(input_batch_raw); // For now we take the stride from the second and third dimensions only (we @@ -850,8 +846,8 @@ REGISTER_KERNEL_BUILDER(Name("_MklConv2DWithBias") // Base class for convolution forward operations template + typename Toutput, typename Ttemp_output, typename Tpadding, + bool biasEnabled, bool padEnabled> class MklConvOp : public OpKernel { public: ~MklConvOp() {} @@ -894,17 +890,15 @@ class MklConvOp : public OpKernel { OP_REQUIRES(context, dilations_.size() == 5, errors::InvalidArgument("Dilation rates field must " "specify 5 dimensions")); - OP_REQUIRES(context, - (GetTensorDim(dilations_, data_format_, 'N') == 1 && - GetTensorDim(dilations_, data_format_, 'C') == 1), + OP_REQUIRES(context, (GetTensorDim(dilations_, data_format_, 'N') == 1 && + GetTensorDim(dilations_, data_format_, 'C') == 1), errors::InvalidArgument( "Current implementation does not yet support " "dilations rates in the batch and depth dimensions.")); OP_REQUIRES( - context, - (GetTensorDim(dilations_, data_format_, '0') > 0 && - GetTensorDim(dilations_, data_format_, '1') > 0 && - GetTensorDim(dilations_, data_format_, '2') > 0), + context, (GetTensorDim(dilations_, data_format_, '0') > 0 && + GetTensorDim(dilations_, data_format_, '1') > 0 && + GetTensorDim(dilations_, data_format_, '2') > 0), errors::InvalidArgument("Dilated rates should be larger than 0.")); } } @@ -940,9 +934,9 @@ class MklConvOp : public OpKernel { auto src_tf_shape = GetTfShape(context, kInputIndex_Src); auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter); conv_utl.GetConvFwdSizesInMklOrder( - src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, - &strides, &dilations, &dst_dims_tf_order, &dst_dims_mkl_order, - &padding_left, &padding_right, padEnabled); + src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides, + &dilations, &dst_dims_tf_order, &dst_dims_mkl_order, &padding_left, + &padding_right, padEnabled); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. @@ -974,9 +968,10 @@ class MklConvOp : public OpKernel { bool isConv2D = (strides_.size() == 4); // TODO(Intel-tf) Add check to make sure padEnabled is true only for 2D - if(!isConv2D){ - OP_REQUIRES(context, padEnabled, - errors::InvalidArgument("Pad+Conv fusion only works for 2D")); + if (!isConv2D) { + OP_REQUIRES( + context, !padEnabled, + errors::InvalidArgument("Pad+Conv fusion only works for 2D")); } // Create memory for user data. // Describe how the inputs and outputs of Convolution look like. Also @@ -1211,7 +1206,6 @@ class MklConvOp : public OpKernel { const int kInputIndex_Pad = 2; const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1; const int kDilationH = 0, kDilationW = 1; - // Allocate filter output tensor. void AllocateFilterOutputTensor( @@ -1282,7 +1276,7 @@ template class MklQuantizedConv2DOp : public MklConvOp { + int32, biasEnabled, false> { public: virtual ~MklQuantizedConv2DOp() { if (this->input_bias_ != nullptr) { @@ -1297,13 +1291,13 @@ class MklQuantizedConv2DOp } explicit MklQuantizedConv2DOp(OpKernelConstruction* context) - : MklConvOp(context) {} + : MklConvOp(context) {} void Compute(OpKernelContext* context) override { // Compute int32 output tensor - MklConvOp::Compute(context); + MklConvOp::Compute(context); // Compute additional outputs: min/max scalars. int bias_index_offset; @@ -1349,8 +1343,8 @@ class MklQuantizedConv2DOp protected: void ExtendConvFwdParams(OpKernelContext* context, MklConvFwdParams& params) override { - MklConvOp::ExtendConvFwdParams(context, params); + MklConvOp::ExtendConvFwdParams(context, params); // When the output type is quint8, the output data id requantized // into quint8. A post_op "output_scale" is added to do the conversion. @@ -1561,11 +1555,11 @@ class MklQuantizedConv2DSumReluOp } } // TODO(mdfaijul): Add cleaner code for non-mkl tensor - MklConvOp::AllocateOutputTensor(context, conv_prim_desc, - output_dims_mkl_order, - output_tf_format, - output_tensor); + MklConvOp::AllocateOutputTensor(context, conv_prim_desc, + output_dims_mkl_order, + output_tf_format, + output_tensor); const Tensor& summand = MklGetInput(context, summand_idx); if (summand.dtype() != DT_FLOAT) TF_CHECK_OK(Status(error::Code::FAILED_PRECONDITION, @@ -1583,8 +1577,8 @@ class MklQuantizedConv2DSumReluOp const float max_filter = context->input(5 + bias_index_offset).flat()(0); - reorder_sum_scale = 255.0 * 127.0 / - (std::max(std::abs(max_input), std::abs(min_input)) * + reorder_sum_scale = + 255.0 * 127.0 / (std::max(std::abs(max_input), std::abs(min_input)) * std::max(std::abs(max_filter), std::abs(min_filter))); std::vector scales; scales.push_back(reorder_sum_scale); @@ -1833,52 +1827,56 @@ REGISTER_KERNEL_BUILDER( MklQuantizedConv2DSumReluOp); #endif // INTEL_MKL_ML - // Register 2D operations -#define REGISTER_MKL_CPU_2D(T) \ - REGISTER_KERNEL_BUILDER(Name("_MklConv2D") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .Label(mkl_op_registry::kMklOpLabel), \ - MklConvOp); \ - REGISTER_KERNEL_BUILDER(Name("_MklConv2DWithBias") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .Label(mkl_op_registry::kMklOpLabel), \ - MklConvOp); \ - REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DWithBias") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .Label(mkl_op_registry::kMklOpLabel), \ - MklDummyOp); \ - REGISTER_KERNEL_BUILDER(Name("_MklPadWithConv2D") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .TypeConstraint("Tpaddings") \ - .Label(mkl_op_registry::kMklOpLabel), \ - MklConvOp); \ - REGISTER_KERNEL_BUILDER(Name("_MklPadWithConv2D") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .TypeConstraint("Tpaddings") \ - .Label(mkl_op_registry::kMklOpLabel), \ - MklConvOp); \ - REGISTER_KERNEL_BUILDER(Name("__MklDummyPadWithConv2D") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .TypeConstraint("Tpaddings") \ - .Label(mkl_op_registry::kMklOpLabel), \ +#define REGISTER_MKL_CPU_2D(T) \ + REGISTER_KERNEL_BUILDER(Name("_MklConv2D") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklConvOp); \ + REGISTER_KERNEL_BUILDER(Name("_MklConv2DWithBias") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklConvOp); \ + REGISTER_KERNEL_BUILDER(Name("__MklDummyConv2DWithBias") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklDummyOp); \ + REGISTER_KERNEL_BUILDER(Name("_MklPadWithConv2D") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tpaddings") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklConvOp); \ + REGISTER_KERNEL_BUILDER(Name("_MklPadWithConv2D") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tpaddings") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklConvOp); \ + REGISTER_KERNEL_BUILDER(Name("__MklDummyPadWithConv2D") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .TypeConstraint("Tpaddings") \ + .Label(mkl_op_registry::kMklOpLabel), \ MklDummyOp); -TF_CALL_float(REGISTER_MKL_CPU); +TF_CALL_float(REGISTER_MKL_CPU_2D); // Register 3D operations -#define REGISTER_MKL_CPU_3D(T) \ - REGISTER_KERNEL_BUILDER(Name("_MklConv3D") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .Label(mkl_op_registry::kMklOpLabel), \ - MklConvOp); +#define REGISTER_MKL_CPU_3D(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("_MklConv3D") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklConvOp); TF_CALL_float(REGISTER_MKL_CPU_3D); } // namespace tensorflow diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 8c71c20879..963826a73a 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -17,8 +17,8 @@ limitations under the License. #define TENSORFLOW_CORE_KERNELS_MKL_CONV_OPS_H_ #include -#include #include +#include #include "mkldnn.hpp" #include "tensorflow/core/framework/numeric_op.h" @@ -85,7 +85,7 @@ class MklDnnConvUtil { } // Calculate Convolution dilations - virtual inline void GetDilationsInMklOrder(memory::dims *dilations) { + virtual inline void GetDilationsInMklOrder(memory::dims* dilations) { // For now we take the dilation from the second and third dimensions only // (we do not support dilation on the batch or depth dimension). CHECK_NOTNULL(dilations); @@ -195,9 +195,8 @@ class MklDnnConvUtil { filter_shape.DebugString())); for (int i = 0; i < ((strides_.size() == 4) ? 3 : 5); i++) { - OP_REQUIRES(context_, - FastBoundsCheck(filter_shape.dim_size(i), - std::numeric_limits::max()), + OP_REQUIRES(context_, FastBoundsCheck(filter_shape.dim_size(i), + std::numeric_limits::max()), errors::InvalidArgument("filter too large")); } @@ -463,8 +462,8 @@ class MklDnnConvUtil { input_tf_shape.DebugString())); } - GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, - strides, dilations, output_dims_tf_order, + GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, strides, + dilations, output_dims_tf_order, output_dims_mkl_order, pad_l, pad_r); } @@ -556,7 +555,6 @@ class MklConvBackpropCommonOp : public OpKernel { TensorFormat data_format_; // NCHW or NHWC }; - ///////////////////////////////////////////////////////////////////// /// Dummy Mkl op that is just used for operators that are intermediate /// output of node fusion in the graph diff --git a/tensorflow/core/kernels/mkl_fused_ops_test.cc b/tensorflow/core/kernels/mkl_fused_ops_test.cc index 900325ac91..991fb08093 100644 --- a/tensorflow/core/kernels/mkl_fused_ops_test.cc +++ b/tensorflow/core/kernels/mkl_fused_ops_test.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #ifdef INTEL_MKL -#ifndef INTEL_MKL_ML_ONLY // We don't support fusion in MKL ML #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/image_ops.h" #include "tensorflow/cc/ops/nn_ops.h" @@ -160,5 +159,4 @@ TEST_F(FusedPadConvOpTest, PaddingConvTestNchw) { Run(DT_FLOAT, image, filter, padding, expected, "NCHW"); } } // namespace tensorflow -#endif // INTEL_MKL_ML_ONLY #endif // INTEL_MKL diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 8afbe0333a..0b99542c5c 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1649,7 +1649,7 @@ REGISTER_OP("_MklPadWithConv2D") .Attr(GetConvnetDataFormatAttrString()) .Attr("dilations: list(int) = [1, 1, 1, 1]") .Attr("Tpaddings: {int32, int64} = DT_INT32") - .SetShapeFn(shape_inference::Conv2DShape) + .SetShapeFn(shape_inference::Conv2DShape) .Doc(R"doc( MKL version of Pad and Conv2D operator. Uses MKL DNN APIs to perform Pad and 2D convolution to the output of convolution. @@ -2159,7 +2159,6 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is expected to invoke these operators. )doc"); - REGISTER_OP("_MklAvgPool3DGrad") .Input("orig_input_shape: int32") .Input("grad: T") -- GitLab From 768b36822e0d5b988a697e0c9e3b65302b051630 Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Mon, 26 Nov 2018 15:45:08 -0800 Subject: [PATCH 0185/1078] fixing the buildifer error of tensorflow/core/kernels/BUILD --- tensorflow/core/kernels/BUILD | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 1759a7f790..efa571b23b 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -29,26 +29,26 @@ package_group( load( "//tensorflow:tensorflow.bzl", + "cc_header_only_library", "if_android", + "if_not_windows", + "tf_cc_binary", "tf_cc_test", "tf_cc_test_mkl", "tf_cc_tests", - "tf_cc_binary", "tf_copts", "tf_cuda_library", - "tf_opts_nortti_if_android", "tf_kernel_library", "tf_mkl_kernel_library", - "cc_header_only_library", - "if_not_windows", + "tf_opts_nortti_if_android", ) load("@local_config_sycl//sycl:build_defs.bzl", "if_sycl") load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_tests") load( "//tensorflow/core:platform/default/build_config.bzl", - "tf_proto_library", "tf_kernel_tests_linkstatic", + "tf_proto_library", ) load( "//tensorflow/core:platform/default/build_config_root.bzl", @@ -6714,10 +6714,10 @@ tf_cc_test_mkl( srcs = ["mkl_fused_ops_test.cc"], linkstatic = 1, deps = [ - ":mkl_conv_op", - ":mkl_tfconv_op", ":conv_ops", ":image", + ":mkl_conv_op", + ":mkl_tfconv_op", ":ops_testutil", ":ops_util", "//tensorflow/cc:cc_ops", @@ -6730,8 +6730,9 @@ tf_cc_test_mkl( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", - ] + ], ) + tf_mkl_kernel_library( name = "mkl_transpose_op", srcs = [ -- GitLab From 453335db8382418dc84593d044f63a995adc025a Mon Sep 17 00:00:00 2001 From: mbhuiyan Date: Mon, 26 Nov 2018 16:57:34 -0800 Subject: [PATCH 0186/1078] applying proper coding style --- tensorflow/core/kernels/mkl_lrn_op.cc | 42 ++++++++++++--------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc index 4d46abb0a4..407ce5d653 100644 --- a/tensorflow/core/kernels/mkl_lrn_op.cc +++ b/tensorflow/core/kernels/mkl_lrn_op.cc @@ -23,7 +23,6 @@ limitations under the License. #define EIGEN_USE_THREADS #include #include "mkldnn.hpp" -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" @@ -32,6 +31,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/mkl_util.h" #include "tensorflow/core/util/tensor_format.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if !defined(IS_MOBILE_PLATFORM) #include "tensorflow/core/util/work_sharder.h" @@ -71,11 +71,10 @@ class MklLRNOp : public OpKernel { explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) { int64 depth_radius64; OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES( - context, - FastBoundsCheck(depth_radius64, std::numeric_limits::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); + OP_REQUIRES(context, FastBoundsCheck(depth_radius64, + std::numeric_limits::max()), + errors::InvalidArgument("depth_radius = ", depth_radius64, + " larger than int max")); depth_radius_ = static_cast(depth_radius64); OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); @@ -161,9 +160,9 @@ class MklLRNOp : public OpKernel { PrepareAndExecuteNet(lrn_prim_desc, &src_dnn_data, &dst_dnn_data, &workspace_dnn_data); } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted("Operation received an exception:", error_msg)); @@ -292,16 +291,14 @@ class MklLRNOp : public OpKernel { if (src_dnn_shape.IsMklTensor()) { OP_REQUIRES(context, src_dnn_shape.GetDimension() == 4, errors::InvalidArgument("input must be 4-dimensional")); - OP_REQUIRES(context, - FastBoundsCheck(src_tensor.NumElements(), - std::numeric_limits::max()), + OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(), + std::numeric_limits::max()), errors::InvalidArgument("argument to LRN too large")); } else { OP_REQUIRES(context, src_tensor.dims() == 4, errors::InvalidArgument("input must be 4-dimensional")); - OP_REQUIRES(context, - FastBoundsCheck(src_tensor.NumElements(), - std::numeric_limits::max()), + OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(), + std::numeric_limits::max()), errors::InvalidArgument("argument to LRN too large")); } } @@ -321,11 +318,10 @@ class MklLRNGradOp : public OpKernel { explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) { int64 depth_radius64; OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64)); - OP_REQUIRES( - context, - FastBoundsCheck(depth_radius64, std::numeric_limits::max()), - errors::InvalidArgument("depth_radius = ", depth_radius64, - " larger than int max")); + OP_REQUIRES(context, FastBoundsCheck(depth_radius64, + std::numeric_limits::max()), + errors::InvalidArgument("depth_radius = ", depth_radius64, + " larger than int max")); depth_radius_ = static_cast(depth_radius64); OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_)); OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); @@ -432,9 +428,9 @@ class MklLRNGradOp : public OpKernel { memory::primitive_desc(target_diff_dst_md, cpu_engine), &workspace_dnn_data); } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted("Operation received an exception:", error_msg)); -- GitLab From b607192183e06ef18f79e7c6dd8812c043fc1ab3 Mon Sep 17 00:00:00 2001 From: Serge Panev Date: Tue, 27 Nov 2018 10:57:57 +0100 Subject: [PATCH 0187/1078] Fix comparison between signed and unsigned integer expressions in tensor_format.h --- tensorflow/core/framework/dataset.h | 2 +- tensorflow/core/util/tensor_format.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index 9b11449b30..e0041492ee 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -162,7 +162,7 @@ class GraphDefBuilderWrapper { const std::vector>& attrs, Node** output) { std::vector> enumerated_inputs(inputs.size()); - for (int i = 0; i < inputs.size(); i++) { + for (int i = 0; static_cast(i) < inputs.size(); i++) { enumerated_inputs[i] = std::make_pair(i, inputs[i]); } return AddDataset(dataset, enumerated_inputs, {}, attrs, output); diff --git a/tensorflow/core/util/tensor_format.h b/tensorflow/core/util/tensor_format.h index b0c349dd90..68674cb282 100644 --- a/tensorflow/core/util/tensor_format.h +++ b/tensorflow/core/util/tensor_format.h @@ -498,7 +498,7 @@ inline TensorShape ShapeFromFormat(TensorFormat format, int64 N, dim_sizes[GetTensorBatchDimIndex(dims, format)] = N; for (int dim = 0; static_cast(dim) < spatial.size(); dim++) { auto dim_size = spatial[dim]; - if (format == FORMAT_NHWC_VECT_W && dim == spatial.size() - 1) { + if (format == FORMAT_NHWC_VECT_W && static_cast(dim) == spatial.size() - 1) { CHECK_EQ(0, dim_size % 4) << "FORMAT_NHWC_VECT_W requires W to be a multiple of 4, but W=" << dim_size; -- GitLab From 6e436c4f058175c62dca5aa37b9b95c15c251ea9 Mon Sep 17 00:00:00 2001 From: manhyuk Date: Tue, 27 Nov 2018 19:08:47 +0900 Subject: [PATCH 0188/1078] fix typo --- tensorflow/compiler/xla/service/algebraic_simplifier_test.cc | 2 +- tensorflow/compiler/xla/shape_util.cc | 2 +- tensorflow/python/ops/ragged/ragged_tensor.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 24c35464ad..054d518671 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -1617,7 +1617,7 @@ TEST_F(AlgebraicSimplifierTest, ReshapeOfTransposeOfRngToRng) { (AlgebraicSimplifierOptions(bitcasting_callback()))); EXPECT_TRUE(simplifier.Run(m.get()).ValueOrDie()); - // Verify that that reshape(transpose(rng)) is replace by a single rng of the + // Verify that reshape(transpose(rng)) is replace by a single rng of the // same shape as the reshape. EXPECT_THAT(computation->root_instruction(), op::Rng()); EXPECT_TRUE(ShapeUtil::Equal(computation->root_instruction()->shape(), diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index b05ec209cc..c185b19687 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -1165,7 +1165,7 @@ Status ForEachMutableSubshapeHelper( // Let the argument `permutation` be P. This is a permutation over `shape`'s // dimensions, so our return value will be a shape with dims P.I = P. Our // goal is to construct a layout permutation L* that we can apply to P such - // that that the physical dimension ordering of the returned shape is the same + // that the physical dimension ordering of the returned shape is the same // as that of the original shape, namely L'. // // Our returned shape has dims P and layout L*, so its in-memory layout is diff --git a/tensorflow/python/ops/ragged/ragged_tensor.py b/tensorflow/python/ops/ragged/ragged_tensor.py index abb27fc3c0..1d4bbd592d 100644 --- a/tensorflow/python/ops/ragged/ragged_tensor.py +++ b/tensorflow/python/ops/ragged/ragged_tensor.py @@ -64,7 +64,7 @@ class RaggedTensor(object): a 3-D `RaggedTensor` that stores the fixed-size word embedding for each word in a sentence, for each sentence in a batch, could be written as `[num_sentences, (num_words), embedding_size]`. The parentheses around - `(num_words)` indicate that that dimension is ragged, and that the length + `(num_words)` indicate that dimension is ragged, and that the length of each element list in that dimension may vary for each item. ### Component Tensors -- GitLab From 4b3a92f1bf905a58daa878999a9e5891d9b9e931 Mon Sep 17 00:00:00 2001 From: Serge Panev Date: Tue, 27 Nov 2018 22:18:58 +0100 Subject: [PATCH 0189/1078] Simpler loop --- tensorflow/core/framework/dataset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index e0041492ee..927cf22c68 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -162,7 +162,7 @@ class GraphDefBuilderWrapper { const std::vector>& attrs, Node** output) { std::vector> enumerated_inputs(inputs.size()); - for (int i = 0; static_cast(i) < inputs.size(); i++) { + for (size_t i = 0; i < inputs.size(); i++) { enumerated_inputs[i] = std::make_pair(i, inputs[i]); } return AddDataset(dataset, enumerated_inputs, {}, attrs, output); -- GitLab From 59ebe545b3385c4c36d3b1602671d109e44ea38c Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Fri, 16 Nov 2018 16:20:05 -0800 Subject: [PATCH 0190/1078] Add CancellationManager to LoopCondOp --- tensorflow/core/kernels/control_flow_ops.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc index 1587eb5114..21aabd9295 100644 --- a/tensorflow/core/kernels/control_flow_ops.cc +++ b/tensorflow/core/kernels/control_flow_ops.cc @@ -600,6 +600,15 @@ LoopCondOp::LoopCondOp(OpKernelConstruction* context) : OpKernel(context) {} LoopCondOp::~LoopCondOp() = default; void LoopCondOp::Compute(OpKernelContext* context) { + CancellationManager* cm = context->cancellation_manager(); + bool already_cancelled = cm->IsCancelled(); + + if (already_cancelled) { + Tensor continue_running(false); + context->set_output(0, continue_running); + return; + } + context->set_output(0, context->input(0)); } -- GitLab From c0b128c45396560f26769a293525c60f76850a3f Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Fri, 16 Nov 2018 16:20:52 -0800 Subject: [PATCH 0191/1078] Add a python test for while_loop timeout --- .../python/kernel_tests/control_flow_ops_py_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 37654abd18..595d4ff37b 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -1988,6 +1988,16 @@ class ControlFlowTest(test.TestCase): for i in xrange(10): self.assertEqual([i], q.dequeue().eval()) + def testWhileTimeOut(self): + run_options = config_pb2.RunOptions(timeout_in_ms=1) + with self.cached_session() as sess: + n = constant_op.constant(0) + c = lambda x: True + b = lambda x: math_ops.add(x, 1) + r = control_flow_ops.while_loop(c, b, [n]) + with self.assertRaises(errors_impl.DeadlineExceededError): + sess.run(r, options=run_options) + @test_util.disable_control_flow_v2("b/117119329 (stack)") def testWhileStack_1(self): with self.cached_session(): -- GitLab From fc5392dedec126f988788a597edb55021fb07b60 Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Tue, 20 Nov 2018 22:25:51 -0500 Subject: [PATCH 0192/1078] Raise an error when loop execution is cancelled --- tensorflow/core/kernels/control_flow_ops.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc index 21aabd9295..61547adb73 100644 --- a/tensorflow/core/kernels/control_flow_ops.cc +++ b/tensorflow/core/kernels/control_flow_ops.cc @@ -602,12 +602,8 @@ LoopCondOp::~LoopCondOp() = default; void LoopCondOp::Compute(OpKernelContext* context) { CancellationManager* cm = context->cancellation_manager(); bool already_cancelled = cm->IsCancelled(); - - if (already_cancelled) { - Tensor continue_running(false); - context->set_output(0, continue_running); - return; - } + OP_REQUIRES(context, !already_cancelled, + errors::Cancelled("Loop execution was cancelled.")); context->set_output(0, context->input(0)); } -- GitLab From 5bb0553a1b3bd580bd1502ab6b339ca3a1f0b5df Mon Sep 17 00:00:00 2001 From: Fei Hu Date: Tue, 27 Nov 2018 22:05:43 -0800 Subject: [PATCH 0193/1078] Handle the case that CancellationManager is null in eager mode --- tensorflow/core/kernels/control_flow_ops.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/control_flow_ops.cc b/tensorflow/core/kernels/control_flow_ops.cc index 61547adb73..b3bdff2575 100644 --- a/tensorflow/core/kernels/control_flow_ops.cc +++ b/tensorflow/core/kernels/control_flow_ops.cc @@ -601,9 +601,11 @@ LoopCondOp::~LoopCondOp() = default; void LoopCondOp::Compute(OpKernelContext* context) { CancellationManager* cm = context->cancellation_manager(); - bool already_cancelled = cm->IsCancelled(); - OP_REQUIRES(context, !already_cancelled, - errors::Cancelled("Loop execution was cancelled.")); + if (cm != nullptr) { + bool already_cancelled = cm->IsCancelled(); + OP_REQUIRES(context, !already_cancelled, + errors::Cancelled("Loop execution was cancelled.")); + } context->set_output(0, context->input(0)); } -- GitLab From ce619f2697afd683813264ae2d068a1038acab77 Mon Sep 17 00:00:00 2001 From: Clayne Robison Date: Wed, 28 Nov 2018 08:43:33 -0700 Subject: [PATCH 0194/1078] [Intel MKL] Updating README.md with new links to Intel(R) Optimized Tensorflow --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8af5370bef..02a40c49b0 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ Build Type **IBM ppc64le GPU** Nightly | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/) **IBM ppc64le GPU** Stable Release | [![Build Status](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/badge/icon)](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) | [Release](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) **Linux CPU with Intel® MKL-DNN** Nightly | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) -**Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.4
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild) | [1.11.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp27-cp27mu-linux_x86_64.whl)
[1.11.0 py3.4](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp34-cp34m-linux_x86_64.whl)
[1.11.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp35-cp35m-linux_x86_64.whl)
[1.11.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.11.0-cp36-cp36m-linux_x86_64.whl) +**Linux CPU with Intel® MKL-DNN** Python 2.7
**Linux CPU with Intel® MKL-DNN** Python 3.4
**Linux CPU with Intel® MKL-DNN** Python 3.5
**Linux CPU with Intel® MKL-DNN** Python 3.6 | [![Build Status](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/badge/icon)](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild) | [1.12.0 py2.7](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.12.0-cp27-cp27mu-linux_x86_64.whl)
[1.12.0 py3.4](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.12.0-cp34-cp34m-linux_x86_64.whl)
[1.12.0 py3.5](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.12.0-cp35-cp35m-linux_x86_64.whl)
[1.12.0 py3.6](https://storage.googleapis.com/intel-optimized-tensorflow/tensorflow-1.12.0-cp36-cp36m-linux_x86_64.whl) ## For more information * [TensorFlow Website](https://www.tensorflow.org) -- GitLab From 0be39e80a210919c1dd3223b4b0ac23380372940 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> Date: Thu, 29 Nov 2018 09:15:50 +0800 Subject: [PATCH 0195/1078] Update tensorflow/core/graph/mkl_layout_pass.cc Use "DCHECK()" instead of "CHECK_EQ". Co-Authored-By: wenxizhu --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index bac434886f..e6c24ab8d4 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -949,7 +949,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { e->dst_input() == kPermTensorIndex) { // we find the "perm" node, now try to retrieve its value. const TensorProto* proto = nullptr; - CHECK_EQ(GetNodeAttr(perm_node->def(), "value", &proto).ok(), true); + DCHECK(GetNodeAttr(perm_node->def(), "value", &proto).ok()); DataType type; GetNodeAttr(perm_node->def(), "dtype", &type); -- GitLab From 41d41c9730e87e2b98ae24a447f233466d7995af Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> Date: Thu, 29 Nov 2018 09:16:49 +0800 Subject: [PATCH 0196/1078] Update tensorflow/core/graph/mkl_layout_pass.cc Use "DCHECK()" instead of "CHECK_EQ". Co-Authored-By: wenxizhu --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index e6c24ab8d4..55c337cbea 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2694,7 +2694,7 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( // Create node. Node* new_node; TF_CHECK_OK(nb.Finalize(&**g, &new_node)); - CHECK_NOTNULL(new_node); + DCHECK(new_node); // Fill outputs. for (const Edge* e : transpose_to_nchw->out_edges()) { -- GitLab From d95a04c30511992befcd66dc12f3227fa65891e4 Mon Sep 17 00:00:00 2001 From: Penporn Koanantakool <38085909+penpornk@users.noreply.github.com> Date: Thu, 29 Nov 2018 09:17:09 +0800 Subject: [PATCH 0197/1078] Update tensorflow/core/graph/mkl_layout_pass.cc Use "DCHECK()" instead of "CHECK_EQ". Co-Authored-By: wenxizhu --- tensorflow/core/graph/mkl_layout_pass.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 55c337cbea..edea296c3b 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2700,7 +2700,7 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( for (const Edge* e : transpose_to_nchw->out_edges()) { if (!e->IsControlEdge()) { const int kTransposeWithMklOpOutputSlot = 0; - CHECK_NOTNULL((*g)->AddEdge(new_node, kTransposeWithMklOpOutputSlot, + DCHECK((*g)->AddEdge(new_node, kTransposeWithMklOpOutputSlot, e->dst(), e->dst_input())); } } -- GitLab From f3db4e300b782afbff93223777ed1b5cd3f1f7cd Mon Sep 17 00:00:00 2001 From: Yuxin Wu Date: Wed, 28 Nov 2018 21:56:27 -0800 Subject: [PATCH 0198/1078] Fix deprecated use of `sparse_to_dense`. Calling `sparse_to_dense` gives a deprecation warning that asks users to use `sparse.to_dense`: https://github.com/tensorflow/tensorflow/blob/71f40f044450736cd6acd29e92ffbfc0e571ee14/tensorflow/python/ops/sparse_ops.py#L952-L955 However, `sparse.to_dense` calls `sparse_to_dense`, which again produces the deprecation warning. --- tensorflow/python/ops/sparse_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 245080cb26..914616ff6d 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -1422,7 +1422,7 @@ def sparse_tensor_to_dense(sp_input, """ sp_input = _convert_to_sparse_tensor(sp_input) - return sparse_to_dense( + return gen_sparse_ops.sparse_to_dense( sp_input.indices, sp_input.dense_shape, sp_input.values, -- GitLab From 0c7a31a168aae02e323c97bc6b81f2d3f19cbb2a Mon Sep 17 00:00:00 2001 From: Nutti Date: Thu, 29 Nov 2018 22:32:54 +0900 Subject: [PATCH 0199/1078] Fix: clang-format error --- tensorflow/core/kernels/partitioned_function_ops.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index 72310f33ae..8aac78f56f 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -179,10 +179,9 @@ class PartitionedCallOp : public AsyncOpKernel { done); OP_REQUIRES_OK_ASYNC( - ctx, - OptimizationPassRegistry::Global()->RunGrouping( - OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, - optimization_options), + ctx, OptimizationPassRegistry::Global()->RunGrouping( + OptimizationPassRegistry::POST_REWRITE_FOR_EXEC, + optimization_options), done); std::unordered_map> subgraphs; -- GitLab From d00013e72cd3a1a4805395eb8e66748dcf387295 Mon Sep 17 00:00:00 2001 From: Wen yun Date: Wed, 31 Oct 2018 15:22:14 +0800 Subject: [PATCH 0200/1078] fix the case when input value are MirroredVariable for assign_moving_average --- .../distribute/python/moving_averages_test.py | 20 +++++++++++++++++++ .../python/distribute/mirrored_strategy.py | 3 +++ 2 files changed, 23 insertions(+) diff --git a/tensorflow/contrib/distribute/python/moving_averages_test.py b/tensorflow/contrib/distribute/python/moving_averages_test.py index c492d8bafc..da3353b2d5 100644 --- a/tensorflow/contrib/distribute/python/moving_averages_test.py +++ b/tensorflow/contrib/distribute/python/moving_averages_test.py @@ -139,6 +139,26 @@ class AssignMovingAveragesTest(test.TestCase, parameterized.TestCase): (2.0 * 0.25 + 0.0) / (1.0 * 0.25 + 1.0)], var.eval()) + @combinations.generate(all_combinations) + def testAssignVariable(self, distribution): + def replica_fn(): + var = variables.Variable([10.0, 11.0]) + # Here we expect to check the case when input value are variable. + val = variables.Variable([1., 2.]) + decay = 0.25 + assign = moving_averages.assign_moving_average( + var, val, decay, zero_debias=False) + return var, assign + + with distribution.scope(), self.cached_session() as sess: + var, assign = distribution.call_for_each_replica(replica_fn) + variables.global_variables_initializer().run() + self.assertAllClose([10.0, 11.0], var.eval()) + sess.run(distribution.unwrap(assign)) + self.assertAllClose( + [10 * 0.25 + 1. * (1 - 0.25), + 11 * 0.25 + 2. * (1 - 0.25)], + var.eval()) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/distribute/mirrored_strategy.py b/tensorflow/python/distribute/mirrored_strategy.py index d6d40df5ce..3cd5cf09c0 100644 --- a/tensorflow/python/distribute/mirrored_strategy.py +++ b/tensorflow/python/distribute/mirrored_strategy.py @@ -598,6 +598,9 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended): return self._cross_device_ops def _reduce_to(self, reduce_op, value, destinations): + if (isinstance(value, values.Mirrored) and + reduce_op == reduce_util.ReduceOp.MEAN): + return value assert not isinstance(value, values.Mirrored) if not isinstance(value, values.DistributedValues): # This function handles reducing values that are not PerReplica or -- GitLab From 675f415603ad0bb0217459672fffbbe798e01d84 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Fri, 24 Aug 2018 17:01:55 -0700 Subject: [PATCH 0201/1078] Upgrade Dockerfile assembler system This is a big upgrade to the Dockerfile assembler I wrote a couple of months ago. The spec has changed, the script has been rewritten, and there are new features throughout: - The assembler can build and upload images to Docker Hub. - The assembler can also run tests (!), although the testing system is extremely rudimentary. It could be expanded with parallelism later, if execution time becomes a problem. - spec.yml is totally different, and now defines both dockerfiles and images. It handles the combinatorial explosion of multiple optional features without excessive duplication, unlike the previous spec format. - Partials are the same, but I dumped the extensive dockerfile documentation support because I don't think anyone would have used it. - Dockerfiles are handled under the same kind of system as images, which is neat. The new Dockerfiles aren't so duplicated. - I've upgraded the images with new tensorflow tutorial files (jupyter only) and fixed some others that didn't actually work. - I've improved the development documentation by suggesting aliases. - Added "static-dockerfiles" directory to track independent Dockerfiles. These changes should better support changes like #23194. --- tensorflow/tools/dockerfiles/.gitignore | 1 + tensorflow/tools/dockerfiles/README.md | 49 +- tensorflow/tools/dockerfiles/assembler.py | 904 ++++++++++-------- .../dockerfiles/cpu-devel-jupyter.Dockerfile | 63 +- .../dockerfiles/cpu-devel.Dockerfile | 46 +- .../dockerfiles/cpu-jupyter.Dockerfile | 52 +- .../dockerfiles/dockerfiles/cpu.Dockerfile | 35 +- ...ockerfile => gpu-devel-jupyter.Dockerfile} | 75 +- ...-devel.Dockerfile => gpu-devel.Dockerfile} | 58 +- ...yter.Dockerfile => gpu-jupyter.Dockerfile} | 61 +- .../{nvidia.Dockerfile => gpu.Dockerfile} | 44 +- .../partials/jupyter.partial.Dockerfile | 15 +- .../partials/tensorflow.partial.Dockerfile | 7 +- .../partials/test-import.partial.Dockerfile | 0 .../partials/ubuntu.partial.Dockerfile | 2 - .../{ => ubuntu}/bazel.partial.Dockerfile | 14 + .../cpu-devel.partial.Dockerfile} | 7 +- .../partials/ubuntu/cpu.partial.Dockerfile | 1 + .../nvidia-devel.partial.Dockerfile | 18 +- .../{ => ubuntu}/nvidia.partial.Dockerfile | 8 +- .../{ => ubuntu}/python.partial.Dockerfile | 5 +- .../ubuntu/test-devel.partial.Dockerfile | 0 .../ubuntu/version.partial.Dockerfile | 1 + tensorflow/tools/dockerfiles/spec.yml | 320 +++---- .../tools/dockerfiles/tests/build-cpu.sh | 22 + .../tools/dockerfiles/tests/build-gpu.sh | 20 + .../tools/dockerfiles/tests/import-gpu.sh | 2 + tensorflow/tools/dockerfiles/tests/import.sh | 3 + ...{assembler.Dockerfile => tools.Dockerfile} | 5 +- 29 files changed, 1018 insertions(+), 820 deletions(-) create mode 100644 tensorflow/tools/dockerfiles/.gitignore rename tensorflow/tools/dockerfiles/dockerfiles/{nvidia-devel-jupyter.Dockerfile => gpu-devel-jupyter.Dockerfile} (67%) rename tensorflow/tools/dockerfiles/dockerfiles/{nvidia-devel.Dockerfile => gpu-devel.Dockerfile} (76%) rename tensorflow/tools/dockerfiles/dockerfiles/{nvidia-jupyter.Dockerfile => gpu-jupyter.Dockerfile} (63%) rename tensorflow/tools/dockerfiles/dockerfiles/{nvidia.Dockerfile => gpu.Dockerfile} (69%) create mode 100644 tensorflow/tools/dockerfiles/partials/test-import.partial.Dockerfile delete mode 100644 tensorflow/tools/dockerfiles/partials/ubuntu.partial.Dockerfile rename tensorflow/tools/dockerfiles/partials/{ => ubuntu}/bazel.partial.Dockerfile (58%) rename tensorflow/tools/dockerfiles/partials/{ubuntu-devel.partial.Dockerfile => ubuntu/cpu-devel.partial.Dockerfile} (86%) create mode 100644 tensorflow/tools/dockerfiles/partials/ubuntu/cpu.partial.Dockerfile rename tensorflow/tools/dockerfiles/partials/{ => ubuntu}/nvidia-devel.partial.Dockerfile (78%) rename tensorflow/tools/dockerfiles/partials/{ => ubuntu}/nvidia.partial.Dockerfile (78%) rename tensorflow/tools/dockerfiles/partials/{ => ubuntu}/python.partial.Dockerfile (66%) create mode 100644 tensorflow/tools/dockerfiles/partials/ubuntu/test-devel.partial.Dockerfile create mode 100644 tensorflow/tools/dockerfiles/partials/ubuntu/version.partial.Dockerfile create mode 100755 tensorflow/tools/dockerfiles/tests/build-cpu.sh create mode 100755 tensorflow/tools/dockerfiles/tests/build-gpu.sh create mode 100755 tensorflow/tools/dockerfiles/tests/import-gpu.sh create mode 100755 tensorflow/tools/dockerfiles/tests/import.sh rename tensorflow/tools/dockerfiles/{assembler.Dockerfile => tools.Dockerfile} (95%) diff --git a/tensorflow/tools/dockerfiles/.gitignore b/tensorflow/tools/dockerfiles/.gitignore new file mode 100644 index 0000000000..d7efa472a9 --- /dev/null +++ b/tensorflow/tools/dockerfiles/.gitignore @@ -0,0 +1 @@ +dockerfiles/*.temp.Dockerfile diff --git a/tensorflow/tools/dockerfiles/README.md b/tensorflow/tools/dockerfiles/README.md index 7c8ca1d1c7..2ac68666d0 100644 --- a/tensorflow/tools/dockerfiles/README.md +++ b/tensorflow/tools/dockerfiles/README.md @@ -1,8 +1,12 @@ # TensorFlow Dockerfiles -This directory houses TensorFlow's Dockerfiles. **DO NOT EDIT THE DOCKERFILES -MANUALLY!** They are maintained by `assembler.py`, which builds Dockerfiles from -the files in `partials/` and the rules in `spec.yml`. See [the Contributing +This directory houses TensorFlow's Dockerfiles and the infrastructure used to +create and deploy them to [Docker +Hub](https://hub.docker.com/r/tensorflow/tensorflow). + +**DO NOT EDIT THE DOCKERFILES/ DIRECTORY MANUALLY!** The files within are +maintained by `assembler.py`, which builds Dockerfiles from the files in +`partials/` and the rules in `spec.yml`. See [the Contributing section](#contributing) for more information. These Dockerfiles are planned to replace the Dockerfiles used to generate @@ -20,10 +24,10 @@ $ docker build -f ./dockerfiles/cpu.Dockerfile -t tf . Each Dockerfile has its own set of available `--build-arg`s which are documented in the Dockerfile itself. -## Running +## Running Locally Built Images After building the image with the tag `tf` (for example), use `docker run` to -run the images. Examples are below. +run the images. Note for new Docker users: the `-v` and `-u` flags share directories between the Docker container and your machine, and very important. Without @@ -42,8 +46,10 @@ $ docker run -u $(id -u):$(id -g) -v $(pwd):/my-devel -it tf # GPU-based images (set up nvidia-docker2 first) $ docker run --runtime=nvidia -u $(id -u):$(id -g) -v $(pwd):/my-devel -it tf -# Images with Jupyter run on port 8888, and needs a volume for notebooks -$ docker run --user $(id -u):$(id -g) -p 8888:8888 -v $(pwd):/notebooks -it tf +# Images with Jupyter run on port 8888 and need a volume for your notebooks +# You can change $(PWD) to the full path to a directory if your notebooks +# live outside the current directory. +$ docker run --user $(id -u):$(id -g) -p 8888:8888 -v $(PWD):/tf/notebooks -it tf ``` These images do not come with the TensorFlow source code -- but the development @@ -60,11 +66,32 @@ You can use the `Dockerfile` in this directory to build an editing environment that has all of the Python dependencies you'll need: ```bash -$ docker build -t tf-assembler -f assembler.Dockerfile . +# Build the tools-helper image so you can run the assembler +$ docker build -t tf-tools -f tools.Dockerfile . # Set --user to set correct permissions on generated files -$ docker run --user $(id -u):$(id -g) -it -v $(pwd):/tf tf-assembler bash +$ docker run --user $(id -u):$(id -g) -it -v $(pwd):/tf tf-tools bash + +# Next you can make a handy alias depending on what you're doing. When building +# Docker images, you need to run as root with docker.sock mounted so that the +# container can run Docker commands. When assembling Dockerfiles, though, you'll +# want to run as your user so that new files have the right permissions. + +# If you're BUILDING OR DEPLOYING DOCKER IMAGES, run as root with docker.sock: +$ alias asm_images="docker run --rm -v $(pwd):/tf -v /var/run/docker.sock:/var/run/docker.sock tf-tools python3 assembler.py " + +# If you're REBUILDING OR ADDING DOCKERFILES, remove docker.sock and add -u: +$ alias asm_dockerfiles="docker run --rm -u $(id -u):$(id -g) -v $(pwd):/tf tf-tools python3 assembler.py " + +# Check flags +$ asm_dockerfiles --help + +# Assemble all of the Dockerfiles +$ asm_dockerfiles --release ubuntu-dockerfiles --construct_dockerfiles + +# Build all of the "nightly" images on your local machine: +$ asm_images --release nightly --build_images -# In the container... -/tf $ python3 ./assembler.py -o dockerfiles -s spec.yml +# Build version release for version 99.0, except "gpu" tags: +$ asm_images --release versioned --arg _TAG_PREFIX=99.0 --build_images --exclude_tags_matching '*.gpu.*' ``` diff --git a/tensorflow/tools/dockerfiles/assembler.py b/tensorflow/tools/dockerfiles/assembler.py index 9cdd9bb0cb..8d97e1d7dc 100644 --- a/tensorflow/tools/dockerfiles/assembler.py +++ b/tensorflow/tools/dockerfiles/assembler.py @@ -1,73 +1,140 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Assemble common TF Dockerfiles from many parts. - -This script constructs TF's Dockerfiles by aggregating partial -Dockerfiles. See README.md for usage examples. +"""Multipurpose TensorFlow Docker Helper. + +- Assembles Dockerfiles +- Builds images (and optionally runs image tests) +- Pushes images to Docker Hub (provided with credentials) + +Read README.md (in this directory) for instructions! """ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import copy import errno +import itertools +import multiprocessing import os -import os.path import re import shutil -import textwrap +import sys from absl import app from absl import flags import cerberus +import docker import yaml FLAGS = flags.FLAGS +flags.DEFINE_string('hub_username', None, + 'Dockerhub username, only used with --upload_to_hub') + +flags.DEFINE_string( + 'hub_password', None, + ('Dockerhub password, only used with --upload_to_hub. Use from an env param' + 'so your password isn\'t in your history.')) + +flags.DEFINE_integer('hub_timeout', 3600, + 'Abort Hub upload if it takes longer than this.') + +flags.DEFINE_string( + 'repository', 'tensorflow', + 'Tag local images as {repository}:tag (in addition to the ' + 'hub_repository, if uploading to hub)') + +flags.DEFINE_string( + 'hub_repository', None, + 'Push tags to this Docker Hub repository, e.g. tensorflow/tensorflow') + +flags.DEFINE_boolean( + 'upload_to_hub', + False, + ('Push built images to Docker Hub (you must also provide --hub_username, ' + '--hub_password, and --hub_repository)'), + short_name='u', +) + +flags.DEFINE_boolean( + 'construct_dockerfiles', False, 'Do not build images', short_name='d') + flags.DEFINE_boolean( - 'dry_run', False, 'Do not actually generate Dockerfiles', short_name='n') + 'keep_temp_dockerfiles', + False, + 'Retain .temp.Dockerfiles created while building images.', + short_name='k') + +flags.DEFINE_boolean( + 'build_images', False, 'Do not build images', short_name='b') flags.DEFINE_string( - 'spec_file', - './spec.yml', - 'Path to a YAML specification file', - short_name='s') + 'run_tests_path', None, + ('Execute test scripts on generated Dockerfiles before pushing them. ' + 'Flag value must be a full path to the "tests" directory, which is usually' + ' $(realpath ./tests). A failed tests counts the same as a failed build.')) + +flags.DEFINE_boolean( + 'stop_on_failure', False, + ('Stop processing tags if any one build fails. If False or not specified, ' + 'failures are reported but do not affect the other images.')) + +flags.DEFINE_boolean( + 'dry_run', + False, + 'Do not build or deploy anything at all.', + short_name='n', +) + +flags.DEFINE_string( + 'exclude_tags_matching', + None, + ('Regular expression that skips processing on any tag it matches. Must ' + 'match entire string, e.g. ".*gpu.*" ignores all GPU tags.'), + short_name='x') + +flags.DEFINE_string( + 'only_tags_matching', + None, + ('Regular expression that skips processing on any tag it does not match. ' + 'Must match entire string, e.g. ".*gpu.*" includes only GPU tags.'), + short_name='i') flags.DEFINE_string( - 'output_dir', - './dockerfiles', ('Path to an output directory for Dockerfiles. ' - 'Will be created if it doesn\'t exist.'), + 'dockerfile_dir', + './dockerfiles', 'Path to an output directory for Dockerfiles.' + ' Will be created if it doesn\'t exist.' + ' Existing files in this directory will be deleted when new Dockerfiles' + ' are made.', short_name='o') flags.DEFINE_string( 'partial_dir', './partials', - 'Path to a directory containing foo.partial.Dockerfile partial files.', + 'Path to a directory containing foo.partial.Dockerfile partial files.' + ' can have subdirectories, e.g. "bar/baz.partial.Dockerfile".', short_name='p') -flags.DEFINE_boolean( - 'quiet_dry_run', - True, - 'Do not print contents of dry run Dockerfiles.', - short_name='q') +flags.DEFINE_multi_string( + 'release', [], + 'Set of releases to build and tag. Defaults to every release type.', + short_name='r') -flags.DEFINE_boolean( - 'validate', True, 'Validate generated Dockerfiles', short_name='c') +flags.DEFINE_multi_string( + 'arg', [], + ('Extra build arguments. These are used for expanding tag names if needed ' + '(e.g. --arg _TAG_PREFIX=foo) and for using as build arguments (unused ' + 'args will print a warning).'), + short_name='a') + +flags.DEFINE_string( + 'spec_file', + './spec.yml', + 'Path to the YAML specification file', + short_name='s') -# Schema to verify the contents of spec.yml with Cerberus. +# Schema to verify the contents of tag-spec.yml with Cerberus. # Must be converted to a dict from yaml to work. # Note: can add python references with e.g. # !!python/name:builtins.str @@ -76,478 +143,513 @@ SCHEMA_TEXT = """ header: type: string -partials: +slice_sets: type: dict keyschema: type: string valueschema: - type: dict - schema: - desc: - type: string - args: + type: list + schema: type: dict - keyschema: - type: string - valueschema: - anyof: - - type: [ boolean, number, string ] - - type: dict - schema: - default: - type: [ boolean, number, string ] - desc: - type: string - options: - type: list - schema: - type: string - -images: + schema: + add_to_name: + type: string + dockerfile_exclusive_name: + type: string + partials: + type: list + schema: + type: string + ispartial: true + test_runtime: + type: string + required: false + tests: + type: list + default: [] + schema: + type: string + args: + type: list + default: [] + schema: + type: string + isfullarg: true + +releases: + type: dict keyschema: type: string valueschema: type: dict schema: - desc: - type: string - arg-defaults: - type: list - schema: - anyof: - - type: dict - keyschema: - type: string - arg_in_use: true - valueschema: - type: string - - type: string - isimage: true - create-dockerfile: + is_dockerfiles: type: boolean - partials: + required: false + default: false + upload_images: + type: boolean + required: false + default: true + tag_specs: type: list + required: true schema: - anyof: - - type: dict - keyschema: - type: string - regex: image - valueschema: - type: string - isimage: true - - type: string - ispartial: true + type: string """ -class TfDockerValidator(cerberus.Validator): - """Custom Cerberus validator for TF dockerfile spec. +class TfDockerTagValidator(cerberus.Validator): + """Custom Cerberus validator for TF tag spec. Note: Each _validate_foo function's docstring must end with a segment describing its own validation schema, e.g. "The rule's arguments are...". If you add a new validator, you can copy/paste that section. """ - def _validate_ispartial(self, ispartial, field, value): - """Validate that a partial references an existing partial spec. + def __init__(self, *args, **kwargs): + # See http://docs.python-cerberus.org/en/stable/customize.html + if 'partials' in kwargs: + self.partials = kwargs['partials'] + super(cerberus.Validator, self).__init__(*args, **kwargs) + + def _validate_isvariant(self, isvariant, field, value): + """Validate that a variant references an existing variant spec. Args: - ispartial: Value of the rule, a bool + isvariant: Value of the rule, a bool field: The field being validated value: The field's value - The rule's arguments are validated against this schema: {'type': 'boolean'} """ - if ispartial and value not in self.root_document.get('partials', dict()): - self._error(field, '{} is not an existing partial.'.format(value)) + if isvariant and value not in self.root_document.get('variants', dict()): + self._error(field, '{} is not an existing variant.'.format(value)) - def _validate_isimage(self, isimage, field, value): - """Validate that an image references an existing partial spec. + def _validate_ispartial(self, ispartial, field, value): + """Validate that a partial references an existing partial spec. Args: - isimage: Value of the rule, a bool + ispartial: Value of the rule, a bool field: The field being validated value: The field's value - The rule's arguments are validated against this schema: {'type': 'boolean'} """ - if isimage and value not in self.root_document.get('images', dict()): - self._error(field, '{} is not an existing image.'.format(value)) + if ispartial and value not in self.partials: + self._error(field, + '{} is not present in the partials directory.'.format(value)) - def _validate_arg_in_use(self, arg_in_use, field, value): - """Validate that an arg references an existing partial spec's args. + def _validate_isfullarg(self, isfullarg, field, value): + """Validate that a string is either a FULL=arg or NOT. Args: - arg_in_use: Value of the rule, a bool + isfullarg: Value of the rule, a bool field: The field being validated value: The field's value - The rule's arguments are validated against this schema: {'type': 'boolean'} """ - if arg_in_use: - for partial in self.root_document.get('partials', dict()).values(): - if value in partial.get('args', tuple()): - return + if isfullarg and '=' not in value: + self._error(field, '{} should be of the form ARG=VALUE.'.format(value)) + if not isfullarg and '=' in value: + self._error(field, '{} should be of the form ARG (no =).'.format(value)) - self._error(field, '{} is not an arg used in any partial.'.format(value)) +def eprint(*args, **kwargs): + print(*args, file=sys.stderr, flush=True, **kwargs) -def build_partial_description(partial_spec): - """Create the documentation lines for a specific partial. - Generates something like this: +def aggregate_all_slice_combinations(spec, slice_set_names): + """Figure out all of the variant groupings for a spec.""" + slice_sets = copy.deepcopy(spec['slice_sets']) - # This is the partial's description, from spec.yml. - # --build-arg ARG_NAME=argdefault - # this is one of the args. - # --build-arg ANOTHER_ARG=(some|choices) - # another arg. + for name in slice_set_names: + for slice_set in slice_sets[name]: + slice_set['set_name'] = name - Args: - partial_spec: A dict representing one of the partials from spec.yml. Doesn't - include the name of the partial; is a dict like { desc: ..., args: ... }. + slices_grouped_but_not_keyed = [slice_sets[name] for name in slice_set_names] + all_slice_combos = list(itertools.product(*slices_grouped_but_not_keyed)) + return all_slice_combos - Returns: - A commented string describing this partial. - """ - # Start from linewrapped desc field - lines = [] - wrapper = textwrap.TextWrapper( - initial_indent='# ', subsequent_indent='# ', width=80) - description = wrapper.fill(partial_spec.get('desc', '( no comments )')) - lines.extend(['#', description]) - - # Document each arg - for arg, arg_data in partial_spec.get('args', dict()).items(): - # Wrap arg description with comment lines - desc = arg_data.get('desc', '( no description )') - desc = textwrap.fill( - desc, - initial_indent='# ', - subsequent_indent='# ', - width=80, - drop_whitespace=False) - - # Document (each|option|like|this) - if 'options' in arg_data: - arg_options = ' ({})'.format('|'.join(arg_data['options'])) - else: - arg_options = '' +def build_name_from_slices(format_string, slices, args, is_dockerfile=False): + """Build the tag name (cpu-devel...) from a list of slices.""" + name_formatter = copy.deepcopy(args) + name_formatter.update({s['set_name']: s['add_to_name'] for s in slices}) + name_formatter.update({ + s['set_name']: s['dockerfile_exclusive_name'] + for s in slices + if is_dockerfile and 'dockerfile_exclusive_name' in s + }) + name = format_string.format(**name_formatter) + return name - # Add usage sample - arg_use = '# --build-arg {}={}{}'.format(arg, - arg_data.get('default', '(unset)'), - arg_options) - lines.extend([arg_use, desc]) - return '\n'.join(lines) +def update_args_dict(args_dict, updater): + """Update a dict of arg values with more values from a list or dict.""" + if isinstance(updater, list): + for arg in updater: + key, sep, value = arg.partition('=') + if sep == '=': + args_dict[key] = value + if isinstance(updater, dict): + for key, value in updater.items(): + args_dict[key] = value + return args_dict -def construct_contents(partial_specs, image_spec): - """Assemble the dockerfile contents for an image spec. +def get_slice_sets_and_required_args(slice_sets, tag_spec): + """Extract used-slice-sets and required CLI arguments from a spec string. - It assembles a concrete list of partial references into a single, large - string. - Also expands argument defaults, so that the resulting Dockerfile doesn't have - to be configured with --build-arg=... every time. That is, any ARG directive - will be updated with a new default value. + For example, {FOO}{bar}{bat} finds FOO, bar, and bat. Assuming bar and bat + are both named slice sets, FOO must be specified on the command line. Args: - partial_specs: The dict from spec.yml["partials"]. - image_spec: One of the dict values from spec.yml["images"]. + slice_sets: Dict of named slice sets + tag_spec: The tag spec string, e.g. {_FOO}{blep} Returns: - A string containing a valid Dockerfile based on the partials listed in - image_spec. + (used_slice_sets, required_args), a tuple of lists """ - processed_partial_strings = [] - for partial_name in image_spec['partials']: - # Apply image arg-defaults to existing arg defaults - partial_spec = copy.deepcopy(partial_specs[partial_name]) - args = partial_spec.get('args', dict()) - for k_v in image_spec.get('arg-defaults', []): - arg, value = list(k_v.items())[0] - if arg in args: - args[arg]['default'] = value - - # Read partial file contents - filename = partial_spec.get('file', partial_name) - partial_path = os.path.join(FLAGS.partial_dir, - '{}.partial.Dockerfile'.format(filename)) - with open(partial_path, 'r') as f_partial: - partial_contents = f_partial.read() - - # Replace ARG FOO=BAR with ARG FOO=[new-default] - for arg, arg_data in args.items(): - if 'default' in arg_data and arg_data['default']: - default = '={}'.format(arg_data['default']) - else: - default = '' - partial_contents = re.sub(r'ARG {}.*'.format(arg), 'ARG {}{}'.format( - arg, default), partial_contents) - - # Store updated partial contents - processed_partial_strings.append(partial_contents) - - # Join everything together - return '\n'.join(processed_partial_strings) - - -def mkdir_p(path): - """Create a directory and its parents, even if it already exists.""" - try: - os.makedirs(path) - except OSError as e: - if e.errno != errno.EEXIST: - raise - - -def construct_documentation(header, partial_specs, image_spec): - """Assemble all of the documentation for a single dockerfile. - - Builds explanations of included partials and available build args. - - Args: - header: The string from spec.yml["header"]; will be commented and wrapped. - partial_specs: The dict from spec.yml["partials"]. - image_spec: The spec for the dockerfile being built. - - Returns: - A string containing a commented header that documents the contents of the - dockerfile. - - """ - # Comment and wrap header and image description - commented_header = '\n'.join( - [('# ' + l).rstrip() for l in header.splitlines()]) - commented_desc = '\n'.join( - ['# ' + l for l in image_spec.get('desc', '').splitlines()]) - partial_descriptions = [] - - # Build documentation for each partial in the image - for partial in image_spec['partials']: - # Copy partial data for default args unique to this image - partial_spec = copy.deepcopy(partial_specs[partial]) - args = partial_spec.get('args', dict()) - - # Overwrite any existing arg defaults - for k_v in image_spec.get('arg-defaults', []): - arg, value = list(k_v.items())[0] - if arg in args: - args[arg]['default'] = value - - # Build the description from new args - partial_description = build_partial_description(partial_spec) - partial_descriptions.append(partial_description) - - contents = [commented_header, '#', commented_desc] + partial_descriptions - return '\n'.join(contents) + '\n' - - -def normalize_partial_args(partial_specs): - """Normalize the shorthand form of a partial's args specification. - - Turns this: - - partial: - args: - SOME_ARG: arg_value + required_args = [] + used_slice_sets = [] + + extract_bracketed_words = re.compile(r'\{([^}]+)\}') + possible_args_or_slice_set_names = extract_bracketed_words.findall(tag_spec) + for name in possible_args_or_slice_set_names: + if name in slice_sets: + used_slice_sets.append(name) + else: + required_args.append(name) - Into this: + return (used_slice_sets, required_args) - partial: - args: - SOME_ARG: - default: arg_value - Args: - partial_specs: The dict from spec.yml["partials"]. This dict is modified in - place. +def gather_tag_args(slices, cli_input_args, required_args): + """Build a dictionary of all the CLI and slice-specified args for a tag.""" + args = dict() - Returns: - The modified contents of partial_specs. - - """ - for _, partial in partial_specs.items(): - args = partial.get('args', dict()) - for arg, value in args.items(): - if not isinstance(value, dict): - new_value = {'default': value} - args[arg] = new_value - - return partial_specs + for s in slices: + args = update_args_dict(args, s['args']) + args = update_args_dict(args, cli_input_args) + for arg in required_args: + if arg not in args: + eprint(('> Error: {} is not a valid variant, and also isn\'t an arg ' + 'provided on the command line. If it is an arg, please specify ' + 'it with --arg. If not, check the variants list.'.format(arg))) + exit(1) -def flatten_args_references(image_specs): - """Resolve all default-args in each image spec to a concrete dict. + return args - Turns this: - example-image: - arg-defaults: - - MY_ARG: ARG_VALUE +def gather_slice_list_items(slices, key): + """For a list of slices, get the flattened list of all of a certain key.""" + return list(itertools.chain(*[s[key] for s in slices if key in s])) - another-example: - arg-defaults: - - ANOTHER_ARG: ANOTHER_VALUE - - example_image - Into this: +def find_first_slice_value(slices, key): + """For a list of slices, get the first value for a certain key.""" + for s in slices: + if key in s: + return s[key] - example-image: - arg-defaults: - - MY_ARG: ARG_VALUE - another-example: - arg-defaults: - - ANOTHER_ARG: ANOTHER_VALUE - - MY_ARG: ARG_VALUE +def assemble_tags(spec, cli_args, enabled_releases, all_partials): + """Gather all the tags based on our spec. Args: - image_specs: A dict of image_spec dicts; should be the contents of the - "images" key in the global spec.yaml. This dict is modified in place and - then returned. + spec: Nested dict containing full Tag spec + cli_args: List of ARG=foo arguments to pass along to Docker build + enabled_releases: List of releases to parse. Empty list = all + all_partials: Dict of every partial, for reference Returns: - The modified contents of image_specs. + Dict of tags and how to build them """ - for _, image_spec in image_specs.items(): - too_deep = 0 - while str in map(type, image_spec.get('arg-defaults', [])) and too_deep < 5: - new_args = [] - for arg in image_spec['arg-defaults']: - if isinstance(arg, str): - new_args.extend(image_specs[arg]['arg-defaults']) - else: - new_args.append(arg) - - image_spec['arg-defaults'] = new_args - too_deep += 1 + tag_data = collections.defaultdict(list) - return image_specs + for name, release in spec['releases'].items(): + for tag_spec in release['tag_specs']: + if enabled_releases and name not in enabled_releases: + eprint('> Skipping release {}'.format(name)) + continue + used_slice_sets, required_cli_args = get_slice_sets_and_required_args( + spec['slice_sets'], tag_spec) -def flatten_partial_references(image_specs): - """Resolve all partial references in each image spec to a concrete list. + slice_combos = aggregate_all_slice_combinations(spec, used_slice_sets) + for slices in slice_combos: - Turns this: + tag_args = gather_tag_args(slices, cli_args, required_cli_args) + tag_name = build_name_from_slices(tag_spec, slices, tag_args, + release['is_dockerfiles']) + used_partials = gather_slice_list_items(slices, 'partials') + used_tests = gather_slice_list_items(slices, 'tests') + test_runtime = find_first_slice_value(slices, 'test_runtime') + dockerfile_contents = merge_partials(spec['header'], used_partials, + all_partials) - example-image: - partials: - - foo + tag_data[tag_name].append({ + 'release': name, + 'tag_spec': tag_spec, + 'is_dockerfiles': release['is_dockerfiles'], + 'upload_images': release['upload_images'], + 'cli_args': tag_args, + 'partials': used_partials, + 'tests': used_tests, + 'test_runtime': test_runtime, + 'dockerfile_contents': dockerfile_contents, + }) - another-example: - partials: - - bar - - image: example-image - - bat + return tag_data - Into this: - example-image: - partials: - - foo +def merge_partials(header, used_partials, all_partials): + """Merge all partial contents with their header.""" + used_partials = list(used_partials) + return '\n'.join([header] + [all_partials[u] for u in used_partials]) - another-example: - partials: - - bar - - foo - - bat - Args: - image_specs: A dict of image_spec dicts; should be the contents of the - "images" key in the global spec.yaml. This dict is modified in place and - then returned. - Returns: - The modified contents of image_specs. - """ - for _, image_spec in image_specs.items(): - too_deep = 0 - while dict in map(type, image_spec['partials']) and too_deep < 5: - new_partials = [] - for partial in image_spec['partials']: - if isinstance(partial, str): - new_partials.append(partial) - else: - new_partials.extend(image_specs[partial['image']]['partials']) +def upload_in_background(hub_repository, dock, image, tag): + """Upload a docker image (to be used by multiprocessing).""" + image.tag(hub_repository, tag=tag) + for line in list(dock.images.push(hub_repository, tag=tag, stream=True)): + print(line) - image_spec['partials'] = new_partials - too_deep += 1 - return image_specs +def mkdir_p(path): + """Create a directory and its parents, even if it already exists.""" + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise -def construct_dockerfiles(tf_spec): - """Generate a mapping of {"cpu": , ...}. +def gather_existing_partials(partial_path): + """Find and read all available partials. Args: - tf_spec: The full spec.yml loaded as a python object. + partial_path (string): read partials from this directory. Returns: - A string:string dict of short names ("cpu-devel") to Dockerfile contents. + Dict[string, string] of partial short names (like "ubuntu/python" or + "bazel") to the full contents of that partial. """ - names_to_contents = dict() - image_specs = tf_spec['images'] - image_specs = flatten_partial_references(image_specs) - image_specs = flatten_args_references(image_specs) - partial_specs = tf_spec['partials'] - partial_specs = normalize_partial_args(partial_specs) - - for name, image_spec in image_specs.items(): - if not image_spec.get('create-dockerfile', True): - continue - documentation = construct_documentation(tf_spec['header'], partial_specs, - image_spec) - contents = construct_contents(partial_specs, image_spec) - names_to_contents[name] = '\n'.join([documentation, contents]) - - return names_to_contents + partials = dict() + for path, _, files in os.walk(partial_path): + for name in files: + fullpath = os.path.join(path, name) + if '.partial.Dockerfile' not in fullpath: + eprint(('> Probably not a problem: skipping {}, which is not a ' + 'partial.').format(fullpath)) + continue + # partial_dir/foo/bar.partial.Dockerfile -> foo/bar + simple_name = fullpath[len(partial_path) + 1:-len('.partial.dockerfile')] + with open(fullpath, 'r') as f: + partial_contents = f.read() + partials[simple_name] = partial_contents + return partials def main(argv): if len(argv) > 1: - raise app.UsageError('Unexpected command line args found: {}'.format(argv)) + raise app.UsageError('Too many command-line arguments.') + # Read the full spec file, used for everything with open(FLAGS.spec_file, 'r') as spec_file: - tf_spec = yaml.load(spec_file) + tag_spec = yaml.load(spec_file) + + # Get existing partial contents + partials = gather_existing_partials(FLAGS.partial_dir) # Abort if spec.yaml is invalid - if FLAGS.validate: - schema = yaml.load(SCHEMA_TEXT) - v = TfDockerValidator(schema) - if not v.validate(tf_spec): - print('>> ERROR: {} is an invalid spec! The errors are:'.format( - FLAGS.spec_file)) - print(yaml.dump(v.errors, indent=2)) + schema = yaml.load(SCHEMA_TEXT) + v = TfDockerTagValidator(schema, partials=partials) + if not v.validate(tag_spec): + eprint('> Error: {} is an invalid spec! The errors are:'.format( + FLAGS.spec_file)) + eprint(yaml.dump(v.errors, indent=2)) + exit(1) + tag_spec = v.normalized(tag_spec) + + # Assemble tags and images used to build them + all_tags = assemble_tags(tag_spec, FLAGS.arg, FLAGS.release, partials) + + # Empty Dockerfile directory if building new Dockerfiles + if FLAGS.construct_dockerfiles: + eprint('> Emptying Dockerfile dir "{}"'.format(FLAGS.dockerfile_dir)) + shutil.rmtree(FLAGS.dockerfile_dir, ignore_errors=True) + mkdir_p(FLAGS.dockerfile_dir) + + # Set up Docker helper + dock = docker.from_env() + + # Login to Docker if uploading images + if FLAGS.upload_to_hub: + if not FLAGS.hub_username: + eprint('> Error: please set --hub_username when uploading to Dockerhub.') exit(1) - else: - print('>> WARNING: Not validating {}'.format(FLAGS.spec_file)) - - # Generate mapping of { "cpu-devel": "", ... } - names_to_contents = construct_dockerfiles(tf_spec) - - # Write each completed Dockerfile - if not FLAGS.dry_run: - print('>> Emptying destination dir "{}"'.format(FLAGS.output_dir)) - shutil.rmtree(FLAGS.output_dir, ignore_errors=True) - mkdir_p(FLAGS.output_dir) - else: - print('>> Skipping creation of {} (dry run)'.format(FLAGS.output_dir)) - for name, contents in names_to_contents.items(): - path = os.path.join(FLAGS.output_dir, name + '.Dockerfile') - if FLAGS.dry_run: - print('>> Skipping writing contents of {} (dry run)'.format(path)) - print(contents) - else: - mkdir_p(FLAGS.output_dir) - print('>> Writing {}'.format(path)) - with open(path, 'w') as f: - f.write(contents) + if not FLAGS.hub_repository: + eprint( + '> Error: please set --hub_repository when uploading to Dockerhub.') + exit(1) + if not FLAGS.hub_password: + eprint('> Error: please set --hub_password when uploading to Dockerhub.') + exit(1) + dock.login( + username=FLAGS.hub_username, + password=FLAGS.hub_password, + ) + + # Each tag has a name ('tag') and a definition consisting of the contents + # of its Dockerfile, its build arg list, etc. + failed_tags = [] + for tag, tag_defs in all_tags.items(): + for tag_def in tag_defs: + eprint('> Working on {}'.format(tag)) + + if FLAGS.exclude_tags_matching and re.match(FLAGS.exclude_tags_matching, + tag): + eprint('>> Excluded due to match against "{}".'.format( + FLAGS.exclude_tags_matching)) + continue + + if FLAGS.only_tags_matching and not re.match(FLAGS.only_tags_matching, + tag): + eprint('>> Excluded due to failure to match against "{}".'.format( + FLAGS.only_tags_matching)) + continue + + # Write releases marked "is_dockerfiles" into the Dockerfile directory + if FLAGS.construct_dockerfiles: + path = os.path.join(FLAGS.dockerfile_dir, tag + '.Dockerfile') + if tag_def['is_dockerfiles']: + eprint('>> Writing {}...'.format(path)) + if not FLAGS.dry_run: + with open(path, 'w') as f: + f.write(tag_def['dockerfile_contents']) + + # Don't build any images for dockerfile-only releases + if not FLAGS.build_images: + continue + + # Generate a temporary Dockerfile to use to build, since docker-py + # needs a filepath relative to the build context (i.e. the current + # directory) + dockerfile = os.path.join(FLAGS.dockerfile_dir, tag + '.temp.Dockerfile') + if not FLAGS.dry_run: + with open(dockerfile, 'w') as f: + f.write(tag_def['dockerfile_contents']) + eprint('>> (Temporary) writing {}...'.format(dockerfile)) + + repo_tag = '{}:{}'.format(FLAGS.repository, tag) + eprint('>> Building {} using build args:'.format(repo_tag)) + for arg, value in tag_def['cli_args'].items(): + eprint('>>> {}={}'.format(arg, value)) + + # Note that we are NOT using cache_from, which appears to limit + # available cache layers to those from explicitly specified layers. Many + # of our layers are similar between local builds, so we want to use the + # implied local build cache. + tag_failed = False + image, logs = None, [] + if not FLAGS.dry_run: + try: + image, logs = dock.images.build( + timeout=FLAGS.hub_timeout, + path='.', + dockerfile=dockerfile, + buildargs=tag_def['cli_args'], + tag=repo_tag) + + # Print logs after finishing + log_lines = [l.get('stream', '') for l in logs] + eprint(''.join(log_lines)) + + # Run tests if requested, and dump output + # Could be improved by backgrounding, but would need better + # multiprocessing support to track failures properly. + if FLAGS.run_tests_path: + if not tag_def['tests']: + eprint('>>> No tests to run.') + for test in tag_def['tests']: + eprint('>> Testing {}...'.format(test)) + container, = dock.containers.run( + image, + '/tests/' + test, + working_dir='/', + log_config={'type': 'journald'}, + detach=True, + stderr=True, + stdout=True, + volumes={FLAGS.run_tests_path: + {'bind': '/tests', 'mode': 'ro'}}, + runtime=tag_def['test_runtime']), + ret = container.wait() + code = ret['StatusCode'] + out = container.logs(stdout=True, stderr=False) + err = container.logs(stdout=False, stderr=True) + container.remove() + if out: + eprint('>>> Output stdout:') + eprint(out.decode('utf-8')) + else: + eprint('>>> No test standard out.') + if err: + eprint('>>> Output stderr:') + eprint(out.decode('utf-8')) + else: + eprint('>>> No test standard err.') + if code != 0: + eprint('>> {} failed tests with status: "{}"'.format( + repo_tag, code)) + failed_tags.append(tag) + tag_failed = True + if FLAGS.stop_on_failure: + eprint('>> ABORTING due to --stop_on_failure!') + exit(1) + else: + eprint('>> Tests look good!') + + except docker.errors.BuildError as e: + eprint('>> {} failed to build with message: "{}"'.format( + repo_tag, e.msg)) + eprint('>> Build logs follow:') + log_lines = [l.get('stream', '') for l in e.build_log] + eprint(''.join(log_lines)) + failed_tags.append(tag) + tag_failed = True + if FLAGS.stop_on_failure: + eprint('>> ABORTING due to --stop_on_failure!') + exit(1) + + # Clean temporary dockerfiles if they were created earlier + if not FLAGS.keep_temp_dockerfiles: + os.remove(dockerfile) + + # Upload new images to DockerHub as long as they built + passed tests + if FLAGS.upload_to_hub: + if not tag_def['upload_images']: + continue + if tag_failed: + continue + + eprint('>> Uploading to {}:{}'.format(FLAGS.hub_repository, tag)) + if not FLAGS.dry_run: + p = multiprocessing.Process( + target=upload_in_background, + args=(FLAGS.hub_repository, dock, image, tag)) + p.start() + + if failed_tags: + eprint( + '> Some tags failed to build or failed testing, check scrollback for ' + 'errors: {}'.format( + ','.join(failed_tags))) + exit(1) if __name__ == '__main__': diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel-jupyter.Dockerfile index dab7178db3..ecc8fabcce 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel-jupyter.Dockerfile @@ -16,27 +16,12 @@ # THIS IS A GENERATED DOCKERFILE. # # This file was assembled from multiple pieces, whose use is documented -# below. Please refer to the the TensorFlow dockerfiles documentation for -# more information. Build args are documented as their default value. -# -# Ubuntu-based, CPU-only environment for developing changes for TensorFlow, with Jupyter included. -# -# Start from Ubuntu, with TF development packages (no GPU support) -# --build-arg UBUNTU_VERSION=16.04 -# ( no description ) -# -# Python is required for TensorFlow and other libraries. -# --build-arg USE_PYTHON_3_NOT_2=True -# Install python 3 over Python 2 -# -# Install the latest version of Bazel and Python development tools. -# -# Configure TensorFlow's shell prompt and login tools. -# -# Launch Jupyter on execution instead of a bash prompt. +# throughout. Please refer to the the TensorFlow dockerfiles documentation +# for more information. ARG UBUNTU_VERSION=16.04 -FROM ubuntu:${UBUNTU_VERSION} + +FROM ubuntu:${UBUNTU_VERSION} AS base RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ @@ -48,7 +33,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpng12-dev \ libzmq3-dev \ pkg-config \ - python-dev \ rsync \ software-properties-common \ unzip \ @@ -59,8 +43,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + -ARG USE_PYTHON_3_NOT_2=True +ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} ARG PYTHON=python${_PY_SUFFIX} ARG PIP=pip${_PY_SUFFIX} @@ -72,10 +59,13 @@ RUN apt-get update && apt-get install -y \ ${PYTHON} \ ${PYTHON}-pip -RUN ${PIP} install --upgrade \ +RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + RUN apt-get update && apt-get install -y \ build-essential \ curl \ @@ -84,6 +74,20 @@ RUN apt-get update && apt-get install -y \ ${PYTHON}-dev \ swig +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + # Install bazel RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \ curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \ @@ -93,11 +97,18 @@ RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8 COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc -RUN ${PIP} install jupyter +RUN ${PIP} install jupyter matplotlib -RUN mkdir /notebooks && chmod a+rwx /notebooks +RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local -WORKDIR /notebooks +RUN apt-get install -y --no-install-recommends wget +WORKDIR /tf/tensorflow-tutorials +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb +RUN apt-get autoremove -y && apt-get remove -y wget +WORKDIR /tf EXPOSE 8888 -CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/notebooks --ip 0.0.0.0 --no-browser --allow-root"] +RUN ${PYTHON} -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel.Dockerfile index 68566ccc8a..2f4a3d6beb 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu-devel.Dockerfile @@ -16,25 +16,12 @@ # THIS IS A GENERATED DOCKERFILE. # # This file was assembled from multiple pieces, whose use is documented -# below. Please refer to the the TensorFlow dockerfiles documentation for -# more information. Build args are documented as their default value. -# -# Ubuntu-based, CPU-only environment for developing changes for TensorFlow. -# -# Start from Ubuntu, with TF development packages (no GPU support) -# --build-arg UBUNTU_VERSION=16.04 -# ( no description ) -# -# Python is required for TensorFlow and other libraries. -# --build-arg USE_PYTHON_3_NOT_2=True -# Install python 3 over Python 2 -# -# Install the latest version of Bazel and Python development tools. -# -# Configure TensorFlow's shell prompt and login tools. +# throughout. Please refer to the the TensorFlow dockerfiles documentation +# for more information. ARG UBUNTU_VERSION=16.04 -FROM ubuntu:${UBUNTU_VERSION} + +FROM ubuntu:${UBUNTU_VERSION} AS base RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ @@ -46,7 +33,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpng12-dev \ libzmq3-dev \ pkg-config \ - python-dev \ rsync \ software-properties-common \ unzip \ @@ -57,8 +43,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + -ARG USE_PYTHON_3_NOT_2=True +ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} ARG PYTHON=python${_PY_SUFFIX} ARG PIP=pip${_PY_SUFFIX} @@ -70,10 +59,13 @@ RUN apt-get update && apt-get install -y \ ${PYTHON} \ ${PYTHON}-pip -RUN ${PIP} install --upgrade \ +RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + RUN apt-get update && apt-get install -y \ build-essential \ curl \ @@ -82,6 +74,20 @@ RUN apt-get update && apt-get install -y \ ${PYTHON}-dev \ swig +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + # Install bazel RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \ curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \ diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile index f889ed6f91..166e255289 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu-jupyter.Dockerfile @@ -16,31 +16,14 @@ # THIS IS A GENERATED DOCKERFILE. # # This file was assembled from multiple pieces, whose use is documented -# below. Please refer to the the TensorFlow dockerfiles documentation for -# more information. Build args are documented as their default value. -# -# Ubuntu-based, CPU-only environment for using TensorFlow, with Jupyter included. -# -# Start from Ubuntu (no GPU support) -# --build-arg UBUNTU_VERSION=16.04 -# ( no description ) -# -# Python is required for TensorFlow and other libraries. -# --build-arg USE_PYTHON_3_NOT_2=True -# Install python 3 over Python 2 -# -# Install the TensorFlow Python package. -# --build-arg TF_PACKAGE=tensorflow (tensorflow|tensorflow-gpu|tf-nightly|tf-nightly-gpu) -# The specific TensorFlow Python package to install -# -# Configure TensorFlow's shell prompt and login tools. -# -# Launch Jupyter on execution instead of a bash prompt. +# throughout. Please refer to the the TensorFlow dockerfiles documentation +# for more information. ARG UBUNTU_VERSION=16.04 -FROM ubuntu:${UBUNTU_VERSION} -ARG USE_PYTHON_3_NOT_2=True +FROM ubuntu:${UBUNTU_VERSION} as base + +ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} ARG PYTHON=python${_PY_SUFFIX} ARG PIP=pip${_PY_SUFFIX} @@ -52,21 +35,36 @@ RUN apt-get update && apt-get install -y \ ${PYTHON} \ ${PYTHON}-pip -RUN ${PIP} install --upgrade \ +RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu ARG TF_PACKAGE=tensorflow RUN ${PIP} install ${TF_PACKAGE} COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc -RUN ${PIP} install jupyter +RUN ${PIP} install jupyter matplotlib -RUN mkdir /notebooks && chmod a+rwx /notebooks +RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local -WORKDIR /notebooks +RUN apt-get install -y --no-install-recommends wget +WORKDIR /tf/tensorflow-tutorials +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb +RUN apt-get autoremove -y && apt-get remove -y wget +WORKDIR /tf EXPOSE 8888 -CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/notebooks --ip 0.0.0.0 --no-browser --allow-root"] +RUN ${PYTHON} -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile index 182a534bed..323f89155b 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/cpu.Dockerfile @@ -16,29 +16,14 @@ # THIS IS A GENERATED DOCKERFILE. # # This file was assembled from multiple pieces, whose use is documented -# below. Please refer to the the TensorFlow dockerfiles documentation for -# more information. Build args are documented as their default value. -# -# Ubuntu-based, CPU-only environment for using TensorFlow -# -# Start from Ubuntu (no GPU support) -# --build-arg UBUNTU_VERSION=16.04 -# ( no description ) -# -# Python is required for TensorFlow and other libraries. -# --build-arg USE_PYTHON_3_NOT_2=True -# Install python 3 over Python 2 -# -# Install the TensorFlow Python package. -# --build-arg TF_PACKAGE=tensorflow (tensorflow|tensorflow-gpu|tf-nightly|tf-nightly-gpu) -# The specific TensorFlow Python package to install -# -# Configure TensorFlow's shell prompt and login tools. +# throughout. Please refer to the the TensorFlow dockerfiles documentation +# for more information. ARG UBUNTU_VERSION=16.04 -FROM ubuntu:${UBUNTU_VERSION} -ARG USE_PYTHON_3_NOT_2=True +FROM ubuntu:${UBUNTU_VERSION} as base + +ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} ARG PYTHON=python${_PY_SUFFIX} ARG PIP=pip${_PY_SUFFIX} @@ -50,10 +35,18 @@ RUN apt-get update && apt-get install -y \ ${PYTHON} \ ${PYTHON}-pip -RUN ${PIP} install --upgrade \ +RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu ARG TF_PACKAGE=tensorflow RUN ${PIP} install ${TF_PACKAGE} diff --git a/tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-devel-jupyter.Dockerfile similarity index 67% rename from tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel-jupyter.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/gpu-devel-jupyter.Dockerfile index 17faa84a68..b77ba52f25 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-devel-jupyter.Dockerfile @@ -16,28 +16,12 @@ # THIS IS A GENERATED DOCKERFILE. # # This file was assembled from multiple pieces, whose use is documented -# below. Please refer to the the TensorFlow dockerfiles documentation for -# more information. Build args are documented as their default value. -# -# Ubuntu-based, Nvidia-GPU-enabled environment for developing changes for TensorFlow, with Jupyter included. -# -# Start from Nvidia's Ubuntu base image with CUDA and CuDNN, with TF development -# packages. -# --build-arg UBUNTU_VERSION=16.04 -# ( no description ) -# -# Python is required for TensorFlow and other libraries. -# --build-arg USE_PYTHON_3_NOT_2=True -# Install python 3 over Python 2 -# -# Install the latest version of Bazel and Python development tools. -# -# Configure TensorFlow's shell prompt and login tools. -# -# Launch Jupyter on execution instead of a bash prompt. +# throughout. Please refer to the the TensorFlow dockerfiles documentation +# for more information. ARG UBUNTU_VERSION=16.04 -FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} + +FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ @@ -60,6 +44,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpng12-dev \ libzmq3-dev \ pkg-config \ + python-dev \ rsync \ software-properties-common \ unzip \ @@ -82,11 +67,19 @@ RUN mkdir /usr/local/cuda-9.0/lib && \ ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \ ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h -# TODO(tobyboyd): Remove after license is excluded from BUILD file. -RUN gunzip /usr/share/doc/libnccl2/NCCL-SLA.txt.gz && \ - cp /usr/share/doc/libnccl2/NCCL-SLA.txt /usr/local/cuda/ +# Configure the build for our CUDA configuration. +ENV CI_BUILD_PYTHON python +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV TF_NEED_CUDA 1 +ENV TF_NEED_TENSORRT 1 +ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 +ENV TF_CUDA_VERSION=9.0 +ENV TF_CUDNN_VERSION=7 + +# NCCL 2.x +ENV TF_NCCL_VERSION=2 -ARG USE_PYTHON_3_NOT_2=True +ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} ARG PYTHON=python${_PY_SUFFIX} ARG PIP=pip${_PY_SUFFIX} @@ -98,10 +91,13 @@ RUN apt-get update && apt-get install -y \ ${PYTHON} \ ${PYTHON}-pip -RUN ${PIP} install --upgrade \ +RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + RUN apt-get update && apt-get install -y \ build-essential \ curl \ @@ -110,6 +106,20 @@ RUN apt-get update && apt-get install -y \ ${PYTHON}-dev \ swig +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + # Install bazel RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \ curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \ @@ -119,11 +129,18 @@ RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8 COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc -RUN ${PIP} install jupyter +RUN ${PIP} install jupyter matplotlib -RUN mkdir /notebooks && chmod a+rwx /notebooks +RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local -WORKDIR /notebooks +RUN apt-get install -y --no-install-recommends wget +WORKDIR /tf/tensorflow-tutorials +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb +RUN apt-get autoremove -y && apt-get remove -y wget +WORKDIR /tf EXPOSE 8888 -CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/notebooks --ip 0.0.0.0 --no-browser --allow-root"] +RUN ${PYTHON} -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-devel.Dockerfile similarity index 76% rename from tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/gpu-devel.Dockerfile index a3ba02a684..bcac1f7015 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/nvidia-devel.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-devel.Dockerfile @@ -16,26 +16,12 @@ # THIS IS A GENERATED DOCKERFILE. # # This file was assembled from multiple pieces, whose use is documented -# below. Please refer to the the TensorFlow dockerfiles documentation for -# more information. Build args are documented as their default value. -# -# Ubuntu-based, Nvidia-GPU-enabled environment for developing changes for TensorFlow. -# -# Start from Nvidia's Ubuntu base image with CUDA and CuDNN, with TF development -# packages. -# --build-arg UBUNTU_VERSION=16.04 -# ( no description ) -# -# Python is required for TensorFlow and other libraries. -# --build-arg USE_PYTHON_3_NOT_2=True -# Install python 3 over Python 2 -# -# Install the latest version of Bazel and Python development tools. -# -# Configure TensorFlow's shell prompt and login tools. +# throughout. Please refer to the the TensorFlow dockerfiles documentation +# for more information. ARG UBUNTU_VERSION=16.04 -FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} + +FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ @@ -58,6 +44,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpng12-dev \ libzmq3-dev \ pkg-config \ + python-dev \ rsync \ software-properties-common \ unzip \ @@ -80,11 +67,19 @@ RUN mkdir /usr/local/cuda-9.0/lib && \ ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \ ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h -# TODO(tobyboyd): Remove after license is excluded from BUILD file. -RUN gunzip /usr/share/doc/libnccl2/NCCL-SLA.txt.gz && \ - cp /usr/share/doc/libnccl2/NCCL-SLA.txt /usr/local/cuda/ +# Configure the build for our CUDA configuration. +ENV CI_BUILD_PYTHON python +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV TF_NEED_CUDA 1 +ENV TF_NEED_TENSORRT 1 +ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 +ENV TF_CUDA_VERSION=9.0 +ENV TF_CUDNN_VERSION=7 + +# NCCL 2.x +ENV TF_NCCL_VERSION=2 -ARG USE_PYTHON_3_NOT_2=True +ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} ARG PYTHON=python${_PY_SUFFIX} ARG PIP=pip${_PY_SUFFIX} @@ -96,10 +91,13 @@ RUN apt-get update && apt-get install -y \ ${PYTHON} \ ${PYTHON}-pip -RUN ${PIP} install --upgrade \ +RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + RUN apt-get update && apt-get install -y \ build-essential \ curl \ @@ -108,6 +106,20 @@ RUN apt-get update && apt-get install -y \ ${PYTHON}-dev \ swig +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + # Install bazel RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \ curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \ diff --git a/tensorflow/tools/dockerfiles/dockerfiles/nvidia-jupyter.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile similarity index 63% rename from tensorflow/tools/dockerfiles/dockerfiles/nvidia-jupyter.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile index fbdea4628a..9d7340abf3 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/nvidia-jupyter.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu-jupyter.Dockerfile @@ -16,30 +16,13 @@ # THIS IS A GENERATED DOCKERFILE. # # This file was assembled from multiple pieces, whose use is documented -# below. Please refer to the the TensorFlow dockerfiles documentation for -# more information. Build args are documented as their default value. -# -# Ubuntu-based, Nvidia-GPU-enabled environment for using TensorFlow, with Jupyter included. -# -# NVIDIA with CUDA and CuDNN, no dev stuff -# --build-arg UBUNTU_VERSION=16.04 -# ( no description ) -# -# Python is required for TensorFlow and other libraries. -# --build-arg USE_PYTHON_3_NOT_2=True -# Install python 3 over Python 2 -# -# Install the TensorFlow Python package. -# --build-arg TF_PACKAGE=tensorflow-gpu (tensorflow|tensorflow-gpu|tf-nightly|tf-nightly-gpu) -# The specific TensorFlow Python package to install -# -# Configure TensorFlow's shell prompt and login tools. -# -# Launch Jupyter on execution instead of a bash prompt. +# throughout. Please refer to the the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 -FROM nvidia/cuda:9.0-base-ubuntu16.04 +FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base -# Pick up some TF dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cuda-command-line-tools-9-0 \ @@ -48,6 +31,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-curand-9-0 \ cuda-cusolver-9-0 \ cuda-cusparse-9-0 \ + curl \ libcudnn7=7.2.1.38-1+cuda9.0 \ libnccl2=2.2.13-1+cuda9.0 \ libfreetype6-dev \ @@ -55,6 +39,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpng12-dev \ libzmq3-dev \ pkg-config \ + rsync \ software-properties-common \ unzip \ && \ @@ -66,7 +51,10 @@ RUN apt-get update && \ apt-get update && \ apt-get install libnvinfer4=4.1.2-1+cuda9.0 -ARG USE_PYTHON_3_NOT_2=True +# For CUDA profiling, TensorFlow requires CUPTI. +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH + +ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} ARG PYTHON=python${_PY_SUFFIX} ARG PIP=pip${_PY_SUFFIX} @@ -78,21 +66,36 @@ RUN apt-get update && apt-get install -y \ ${PYTHON} \ ${PYTHON}-pip -RUN ${PIP} install --upgrade \ +RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools -ARG TF_PACKAGE=tensorflow-gpu +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +ARG TF_PACKAGE=tensorflow RUN ${PIP} install ${TF_PACKAGE} COPY bashrc /etc/bash.bashrc RUN chmod a+rwx /etc/bash.bashrc -RUN ${PIP} install jupyter +RUN ${PIP} install jupyter matplotlib -RUN mkdir /notebooks && chmod a+rwx /notebooks +RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local -WORKDIR /notebooks +RUN apt-get install -y --no-install-recommends wget +WORKDIR /tf/tensorflow-tutorials +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb +RUN apt-get autoremove -y && apt-get remove -y wget +WORKDIR /tf EXPOSE 8888 -CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/notebooks --ip 0.0.0.0 --no-browser --allow-root"] +RUN ${PYTHON} -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/dockerfiles/nvidia.Dockerfile b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile similarity index 69% rename from tensorflow/tools/dockerfiles/dockerfiles/nvidia.Dockerfile rename to tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile index e0312dbc29..e8e6ceafe2 100644 --- a/tensorflow/tools/dockerfiles/dockerfiles/nvidia.Dockerfile +++ b/tensorflow/tools/dockerfiles/dockerfiles/gpu.Dockerfile @@ -16,28 +16,13 @@ # THIS IS A GENERATED DOCKERFILE. # # This file was assembled from multiple pieces, whose use is documented -# below. Please refer to the the TensorFlow dockerfiles documentation for -# more information. Build args are documented as their default value. -# -# Ubuntu-based, Nvidia-GPU-enabled environment for using TensorFlow. -# -# NVIDIA with CUDA and CuDNN, no dev stuff -# --build-arg UBUNTU_VERSION=16.04 -# ( no description ) -# -# Python is required for TensorFlow and other libraries. -# --build-arg USE_PYTHON_3_NOT_2=True -# Install python 3 over Python 2 -# -# Install the TensorFlow Python package. -# --build-arg TF_PACKAGE=tensorflow-gpu (tensorflow|tensorflow-gpu|tf-nightly|tf-nightly-gpu) -# The specific TensorFlow Python package to install -# -# Configure TensorFlow's shell prompt and login tools. +# throughout. Please refer to the the TensorFlow dockerfiles documentation +# for more information. + +ARG UBUNTU_VERSION=16.04 -FROM nvidia/cuda:9.0-base-ubuntu16.04 +FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base -# Pick up some TF dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cuda-command-line-tools-9-0 \ @@ -46,6 +31,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-curand-9-0 \ cuda-cusolver-9-0 \ cuda-cusparse-9-0 \ + curl \ libcudnn7=7.2.1.38-1+cuda9.0 \ libnccl2=2.2.13-1+cuda9.0 \ libfreetype6-dev \ @@ -53,6 +39,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpng12-dev \ libzmq3-dev \ pkg-config \ + rsync \ software-properties-common \ unzip \ && \ @@ -64,7 +51,10 @@ RUN apt-get update && \ apt-get update && \ apt-get install libnvinfer4=4.1.2-1+cuda9.0 -ARG USE_PYTHON_3_NOT_2=True +# For CUDA profiling, TensorFlow requires CUPTI. +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH + +ARG USE_PYTHON_3_NOT_2 ARG _PY_SUFFIX=${USE_PYTHON_3_NOT_2:+3} ARG PYTHON=python${_PY_SUFFIX} ARG PIP=pip${_PY_SUFFIX} @@ -76,11 +66,19 @@ RUN apt-get update && apt-get install -y \ ${PYTHON} \ ${PYTHON}-pip -RUN ${PIP} install --upgrade \ +RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools -ARG TF_PACKAGE=tensorflow-gpu +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python + +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +ARG TF_PACKAGE=tensorflow RUN ${PIP} install ${TF_PACKAGE} COPY bashrc /etc/bash.bashrc diff --git a/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile index 2c9b9f3f9a..51b22d1fdf 100644 --- a/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/jupyter.partial.Dockerfile @@ -1,8 +1,15 @@ -RUN ${PIP} install jupyter +RUN ${PIP} install jupyter matplotlib -RUN mkdir /notebooks && chmod a+rwx /notebooks +RUN mkdir -p /tf/tensorflow-tutorials && chmod -R a+rwx /tf/ RUN mkdir /.local && chmod a+rwx /.local -WORKDIR /notebooks +RUN apt-get install -y --no-install-recommends wget +WORKDIR /tf/tensorflow-tutorials +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_classification.ipynb +RUN wget https://raw.githubusercontent.com/tensorflow/docs/master/site/en/tutorials/keras/basic_text_classification.ipynb +RUN apt-get autoremove -y && apt-get remove -y wget +WORKDIR /tf EXPOSE 8888 -CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/notebooks --ip 0.0.0.0 --no-browser --allow-root"] +RUN ${PYTHON} -m ipykernel.kernelspec + +CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/tf --ip 0.0.0.0 --no-browser --allow-root"] diff --git a/tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile index 96e79547f0..76758bd147 100644 --- a/tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/tensorflow.partial.Dockerfile @@ -1,2 +1,7 @@ -ARG TF_PACKAGE +# Options: +# tensorflow +# tensorflow-gpu +# tf-nightly +# tf-nightly-gpu +ARG TF_PACKAGE=tensorflow RUN ${PIP} install ${TF_PACKAGE} diff --git a/tensorflow/tools/dockerfiles/partials/test-import.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/test-import.partial.Dockerfile new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu.partial.Dockerfile deleted file mode 100644 index 0a50735bf8..0000000000 --- a/tensorflow/tools/dockerfiles/partials/ubuntu.partial.Dockerfile +++ /dev/null @@ -1,2 +0,0 @@ -ARG UBUNTU_VERSION=16.04 -FROM ubuntu:${UBUNTU_VERSION} diff --git a/tensorflow/tools/dockerfiles/partials/bazel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile similarity index 58% rename from tensorflow/tools/dockerfiles/partials/bazel.partial.Dockerfile rename to tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile index b08d8bdd14..156bb01991 100644 --- a/tensorflow/tools/dockerfiles/partials/bazel.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/bazel.partial.Dockerfile @@ -6,6 +6,20 @@ RUN apt-get update && apt-get install -y \ ${PYTHON}-dev \ swig +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + keras_applications \ + keras_preprocessing \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && test "${USE_PYTHON_3_NOT_2}" -eq 1 && true || ${PIP} --no-cache-dir install \ + enum34 + # Install bazel RUN echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list && \ curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \ diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu-devel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/cpu-devel.partial.Dockerfile similarity index 86% rename from tensorflow/tools/dockerfiles/partials/ubuntu-devel.partial.Dockerfile rename to tensorflow/tools/dockerfiles/partials/ubuntu/cpu-devel.partial.Dockerfile index bc79272276..901652cc28 100644 --- a/tensorflow/tools/dockerfiles/partials/ubuntu-devel.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/cpu-devel.partial.Dockerfile @@ -1,5 +1,4 @@ -ARG UBUNTU_VERSION=16.04 -FROM ubuntu:${UBUNTU_VERSION} +FROM ubuntu:${UBUNTU_VERSION} AS base RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ @@ -11,7 +10,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpng12-dev \ libzmq3-dev \ pkg-config \ - python-dev \ rsync \ software-properties-common \ unzip \ @@ -22,3 +20,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* + +ENV CI_BUILD_PYTHON python + diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/cpu.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/cpu.partial.Dockerfile new file mode 100644 index 0000000000..d01b26e27f --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/cpu.partial.Dockerfile @@ -0,0 +1 @@ +FROM ubuntu:${UBUNTU_VERSION} as base diff --git a/tensorflow/tools/dockerfiles/partials/nvidia-devel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia-devel.partial.Dockerfile similarity index 78% rename from tensorflow/tools/dockerfiles/partials/nvidia-devel.partial.Dockerfile rename to tensorflow/tools/dockerfiles/partials/ubuntu/nvidia-devel.partial.Dockerfile index 45159f711f..48d457e40c 100644 --- a/tensorflow/tools/dockerfiles/partials/nvidia-devel.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia-devel.partial.Dockerfile @@ -1,5 +1,4 @@ -ARG UBUNTU_VERSION=16.04 -FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} +FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ @@ -22,6 +21,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpng12-dev \ libzmq3-dev \ pkg-config \ + python-dev \ rsync \ software-properties-common \ unzip \ @@ -44,6 +44,14 @@ RUN mkdir /usr/local/cuda-9.0/lib && \ ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/local/cuda/lib/libnccl.so.2 && \ ln -s /usr/include/nccl.h /usr/local/cuda/include/nccl.h -# TODO(tobyboyd): Remove after license is excluded from BUILD file. -RUN gunzip /usr/share/doc/libnccl2/NCCL-SLA.txt.gz && \ - cp /usr/share/doc/libnccl2/NCCL-SLA.txt /usr/local/cuda/ +# Configure the build for our CUDA configuration. +ENV CI_BUILD_PYTHON python +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH +ENV TF_NEED_CUDA 1 +ENV TF_NEED_TENSORRT 1 +ENV TF_CUDA_COMPUTE_CAPABILITIES=3.5,5.2,6.0,6.1,7.0 +ENV TF_CUDA_VERSION=9.0 +ENV TF_CUDNN_VERSION=7 + +# NCCL 2.x +ENV TF_NCCL_VERSION=2 diff --git a/tensorflow/tools/dockerfiles/partials/nvidia.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile similarity index 78% rename from tensorflow/tools/dockerfiles/partials/nvidia.partial.Dockerfile rename to tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile index 1064390af3..1dc8e43aad 100644 --- a/tensorflow/tools/dockerfiles/partials/nvidia.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/nvidia.partial.Dockerfile @@ -1,6 +1,5 @@ -FROM nvidia/cuda:9.0-base-ubuntu16.04 +FROM nvidia/cuda:9.0-base-ubuntu${UBUNTU_VERSION} as base -# Pick up some TF dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cuda-command-line-tools-9-0 \ @@ -9,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ cuda-curand-9-0 \ cuda-cusolver-9-0 \ cuda-cusparse-9-0 \ + curl \ libcudnn7=7.2.1.38-1+cuda9.0 \ libnccl2=2.2.13-1+cuda9.0 \ libfreetype6-dev \ @@ -16,6 +16,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ libpng12-dev \ libzmq3-dev \ pkg-config \ + rsync \ software-properties-common \ unzip \ && \ @@ -26,3 +27,6 @@ RUN apt-get update && \ apt-get install nvinfer-runtime-trt-repo-ubuntu1604-4.0.1-ga-cuda9.0 && \ apt-get update && \ apt-get install libnvinfer4=4.1.2-1+cuda9.0 + +# For CUDA profiling, TensorFlow requires CUPTI. +ENV LD_LIBRARY_PATH /usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH diff --git a/tensorflow/tools/dockerfiles/partials/python.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/python.partial.Dockerfile similarity index 66% rename from tensorflow/tools/dockerfiles/partials/python.partial.Dockerfile rename to tensorflow/tools/dockerfiles/partials/ubuntu/python.partial.Dockerfile index ee08af73a8..6af4731953 100644 --- a/tensorflow/tools/dockerfiles/partials/python.partial.Dockerfile +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/python.partial.Dockerfile @@ -10,6 +10,9 @@ RUN apt-get update && apt-get install -y \ ${PYTHON} \ ${PYTHON}-pip -RUN ${PIP} install --upgrade \ +RUN ${PIP} --no-cache-dir install --upgrade \ pip \ setuptools + +# Some TF tools expect a "python" binary +RUN ln -s $(which ${PYTHON}) /usr/local/bin/python diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/test-devel.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/test-devel.partial.Dockerfile new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tensorflow/tools/dockerfiles/partials/ubuntu/version.partial.Dockerfile b/tensorflow/tools/dockerfiles/partials/ubuntu/version.partial.Dockerfile new file mode 100644 index 0000000000..6ecd2b8b1a --- /dev/null +++ b/tensorflow/tools/dockerfiles/partials/ubuntu/version.partial.Dockerfile @@ -0,0 +1 @@ +ARG UBUNTU_VERSION=16.04 diff --git a/tensorflow/tools/dockerfiles/spec.yml b/tensorflow/tools/dockerfiles/spec.yml index 28bf9a55da..5049e8dcfb 100644 --- a/tensorflow/tools/dockerfiles/spec.yml +++ b/tensorflow/tools/dockerfiles/spec.yml @@ -1,195 +1,135 @@ -# ====== -# HEADER -# ====== -# -# This is commented-out and prepended to each generated Dockerfile. header: | - Copyright 2018 The TensorFlow Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ============================================================================ - - THIS IS A GENERATED DOCKERFILE. - - This file was assembled from multiple pieces, whose use is documented - below. Please refer to the the TensorFlow dockerfiles documentation for - more information. Build args are documented as their default value. - -# ======== -# PARTIALS -# ======== + # Copyright 2018 The TensorFlow Authors. All Rights Reserved. + # + # Licensed under the Apache License, Version 2.0 (the "License"); + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # ============================================================================ + # + # THIS IS A GENERATED DOCKERFILE. + # + # This file was assembled from multiple pieces, whose use is documented + # throughout. Please refer to the the TensorFlow dockerfiles documentation + # for more information. + +# A combinatorial explosion of Docker images and Dockerfiles. +# Each "release" defines all of the ways to combine related but separate chunks +# of functionality ("slices") by listing all of the "slice sets" to use when +# building. # -# Represent and document pieces of a Dockerfile. Spec: -# -# name: the name of the partial, is referenced from the images section -# desc: A description, inserted later into the Dockerfile -# file: Alternative file prefix, e.g. file.partial.Dockerfile. The default is -# the name of the partial. -# args: A dict of ARGs in the Dockerfile; each entry has the format -# ARG_NAME: VALUE where VALUE is one of: -# - a dict: -# desc: Documentation for the arg -# default: Default value for the arg; is written to the Dockerfile -# options: List of strings, part of documentation -# - a concrete value: the same as a dictionary with default: [value]. - -partials: - ubuntu: - desc: Start from Ubuntu (no GPU support) - args: - UBUNTU_VERSION: 16.04 - - ubuntu-devel: - desc: Start from Ubuntu, with TF development packages (no GPU support) - args: - UBUNTU_VERSION: 16.04 - - bazel: - desc: Install the latest version of Bazel and Python development tools. - - nvidia: - desc: NVIDIA with CUDA and CuDNN, no dev stuff - args: - UBUNTU_VERSION: 16.04 - - nvidia-devel: - desc: > - Start from Nvidia's Ubuntu base image with CUDA and CuDNN, with TF - development packages. - args: - UBUNTU_VERSION: 16.04 +# For example, a release that uses {nightly}{py} would create 4 Dockerfiles +# (which could become images or concrete Dockerfiles), because the "nightly" +# and "py" slice sets both have two entries: +# +# - nightly (no -py2 because the Python 2 slice set has add_to_name: "" +# - nightly-py3 +# - nightly-gpu (similar) +# - nightly-gpu-py3 + +releases: + nightly: + tag_specs: + - "{nightly}{py}{jupyter}" + + versioned: + tag_specs: + - "{_TAG_PREFIX}{ubuntu}{py}{jupyter}" + + ubuntu-dockerfiles: + is_dockerfiles: true + upload_images: false + tag_specs: + - "{ubuntu}{jupyter}" + +slice_sets: + + py: + - add_to_name: "" + args: + - USE_PYTHON_3_NOT_2= + - add_to_name: "-py3" + args: + - USE_PYTHON_3_NOT_2=1 - python: - desc: Python is required for TensorFlow and other libraries. - args: - USE_PYTHON_3_NOT_2: - default: true - desc: Install python 3 over Python 2 - - tensorflow: - desc: Install the TensorFlow Python package. - args: - TF_PACKAGE: - default: tensorflow - options: - - tensorflow - - tensorflow-gpu - - tf-nightly - - tf-nightly-gpu - desc: The specific TensorFlow Python package to install - shell: - desc: Configure TensorFlow's shell prompt and login tools. jupyter: - desc: Launch Jupyter on execution instead of a bash prompt. - -# ====== -# IMAGES -# ====== -# -# Represent Dockerfiles. Spec: -# -# name: the name of the image, possibly referenced by other images -# desc: A description, inserted later into the Dockerfile -# create-dockerfile: Create a dockerfile based on this. Useful for creating -# extensible base images that don't need a file. Default is true. -# partials: List of VALUEs, where a VALUE is either: -# - the name of a partial, which inserts that partial into this image -# - image: [name of another image], which inserts the partials from that -# image into this image -# arg-defaults: List of VALUEs, where a VALUE is either: -# - ARG_NAME: VALUE, which sets the ARG_NAME to VALUE wherever it appears -# in this image's partials -# - [name of another image], which loads the default args from that image -images: - - nodev: - create-dockerfile: false - partials: - - python - - tensorflow - - shell - - dev: - create-dockerfile: false - partials: - - python - - bazel - - shell - - cpu: - desc: Ubuntu-based, CPU-only environment for using TensorFlow - partials: - - ubuntu - - image: nodev - - cpu-devel: - desc: > - Ubuntu-based, CPU-only environment for developing changes for - TensorFlow. - partials: - - ubuntu-devel - - image: dev + - add_to_name: "" + - add_to_name: "-jupyter" + partials: + - jupyter - nvidia: - desc: Ubuntu-based, Nvidia-GPU-enabled environment for using TensorFlow. - arg-defaults: - - TF_PACKAGE: tensorflow-gpu - partials: - - nvidia - - image: nodev - - nvidia-devel: - desc: > - Ubuntu-based, Nvidia-GPU-enabled environment for developing changes - for TensorFlow. - arg-defaults: - - TF_PACKAGE: tensorflow-gpu - partials: - - nvidia-devel - - image: dev - - cpu-jupyter: - desc: > - Ubuntu-based, CPU-only environment for using TensorFlow, with Jupyter - included. - partials: - - image: cpu - - jupyter - - cpu-devel-jupyter: - desc: > - Ubuntu-based, CPU-only environment for developing changes for - TensorFlow, with Jupyter included. - partials: - - image: cpu-devel - - jupyter - - nvidia-jupyter: - desc: > - Ubuntu-based, Nvidia-GPU-enabled environment for using TensorFlow, with - Jupyter included. - arg-defaults: - - nvidia - partials: - - image: nvidia - - jupyter - - nvidia-devel-jupyter: - desc: > - Ubuntu-based, Nvidia-GPU-enabled environment for developing changes for - TensorFlow, with Jupyter included. - arg-defaults: - - nvidia-devel - partials: - - image: nvidia-devel - - jupyter + ubuntu: + - add_to_name: "" + dockerfile_exclusive_name: "cpu" + partials: + - ubuntu/version + - ubuntu/cpu + - ubuntu/python + - tensorflow + - shell + - add_to_name: "-gpu" + dockerfile_exclusive_name: "gpu" + args: + - TF_PACKAGE=tensorflow-gpu + partials: + - ubuntu/version + - ubuntu/nvidia + - ubuntu/python + - tensorflow + - shell + tests: + - import-gpu.sh + test_runtime: nvidia + - add_to_name: "-devel" + dockerfile_exclusive_name: "cpu-devel" + partials: + - ubuntu/version + - ubuntu/cpu-devel + - ubuntu/python + - ubuntu/bazel + - shell + tests: + - build-cpu.sh + - add_to_name: "-gpu-devel" + dockerfile_exclusive_name: "gpu-devel" + partials: + - ubuntu/version + - ubuntu/nvidia-devel + - ubuntu/python + - ubuntu/bazel + - shell + tests: + - build-gpu.sh + test_runtime: nvidia + + nightly: + - add_to_name: "nightly" + partials: + - ubuntu/version + - ubuntu/cpu + - ubuntu/python + - tensorflow + - shell + args: + - TF_PACKAGE=tf-nightly + tests: + - import.sh + - add_to_name: "nightly-gpu" + partials: + - ubuntu/version + - ubuntu/nvidia + - ubuntu/python + - tensorflow + - shell + test_runtime: nvidia + tests: + - import-gpu.sh + args: + - TF_PACKAGE=tf-nightly-gpu diff --git a/tensorflow/tools/dockerfiles/tests/build-cpu.sh b/tensorflow/tools/dockerfiles/tests/build-cpu.sh new file mode 100755 index 0000000000..337239dc38 --- /dev/null +++ b/tensorflow/tools/dockerfiles/tests/build-cpu.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash + +# Download and build TensorFlow. +set -euxo pipefail +git clone --branch=master --depth=1 https://github.com/tensorflow/tensorflow.git /tensorflow +cd /tensorflow + +ln -s $(which ${PYTHON}) /usr/local/bin/python + +# For optimized builds appropriate for the hardware platform of your choosing, uncomment below... +# For ivy-bridge or sandy-bridge +# --copt=-march="ivybridge" \ +# for haswell, broadwell, or skylake +# --copt=-march="haswell" \ +tensorflow/tools/ci_build/builds/configured CPU \ + bazel build -c opt --copt=-mavx --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ + tensorflow/tools/pip_package:build_pip_package && \ + bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \ + pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \ + rm -rf /tmp/pip && \ + rm -rf /root/.cache + diff --git a/tensorflow/tools/dockerfiles/tests/build-gpu.sh b/tensorflow/tools/dockerfiles/tests/build-gpu.sh new file mode 100755 index 0000000000..7a1e7ad340 --- /dev/null +++ b/tensorflow/tools/dockerfiles/tests/build-gpu.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Download and build TensorFlow. +set -euxo pipefail +git clone --branch=master --depth=1 https://github.com/tensorflow/tensorflow.git /tensorflow +cd /tensorflow + +ln -s $(which ${PYTHON}) /usr/local/bin/python + +ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 + +LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} \ +tensorflow/tools/ci_build/builds/configured GPU \ +bazel build -c opt --copt=-mavx --config=cuda \ + --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \ + tensorflow/tools/pip_package:build_pip_package && \ +rm /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ +bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \ +pip --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \ +rm -rf /tmp/pip && \ +rm -rf /root/.cache diff --git a/tensorflow/tools/dockerfiles/tests/import-gpu.sh b/tensorflow/tools/dockerfiles/tests/import-gpu.sh new file mode 100755 index 0000000000..214e5ccf2c --- /dev/null +++ b/tensorflow/tools/dockerfiles/tests/import-gpu.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +python -c 'import tensorflow as tf; tf.test.is_gpu_available() or exit(1)' diff --git a/tensorflow/tools/dockerfiles/tests/import.sh b/tensorflow/tools/dockerfiles/tests/import.sh new file mode 100755 index 0000000000..79998aad77 --- /dev/null +++ b/tensorflow/tools/dockerfiles/tests/import.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +set -euxo pipefail +python -c 'import tensorflow as tf' diff --git a/tensorflow/tools/dockerfiles/assembler.Dockerfile b/tensorflow/tools/dockerfiles/tools.Dockerfile similarity index 95% rename from tensorflow/tools/dockerfiles/assembler.Dockerfile rename to tensorflow/tools/dockerfiles/tools.Dockerfile index 7a8e07fced..e8929295a5 100644 --- a/tensorflow/tools/dockerfiles/assembler.Dockerfile +++ b/tensorflow/tools/dockerfiles/tools.Dockerfile @@ -20,8 +20,9 @@ FROM debian:stretch LABEL maintainer="Austin Anderson " -RUN apt-get update && apt-get install -y python3 python3-pip bash -RUN pip3 install --upgrade pip setuptools pyyaml absl-py cerberus +RUN apt-get update && apt-get install -y python3 python3-pip bash curl +RUN curl -sSL https://get.docker.com/ | sh +RUN pip3 install --upgrade pip setuptools pyyaml absl-py cerberus docker WORKDIR /tf VOLUME ["/tf"] -- GitLab From 814a6c24e4dfcf220a3b9ea429ad88b07fa81818 Mon Sep 17 00:00:00 2001 From: wenxizhu Date: Fri, 30 Nov 2018 09:24:23 +0800 Subject: [PATCH 0202/1078] Fix format for clang-format check. --- tensorflow/core/graph/mkl_layout_pass.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index edea296c3b..1c1a6ce652 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2700,8 +2700,8 @@ Status MklLayoutRewritePass::FuseTransposeMklOpTranspose( for (const Edge* e : transpose_to_nchw->out_edges()) { if (!e->IsControlEdge()) { const int kTransposeWithMklOpOutputSlot = 0; - DCHECK((*g)->AddEdge(new_node, kTransposeWithMklOpOutputSlot, - e->dst(), e->dst_input())); + DCHECK((*g)->AddEdge(new_node, kTransposeWithMklOpOutputSlot, e->dst(), + e->dst_input())); } } -- GitLab From 33f3b46e1b209be6a64f53562fd4456352c878ee Mon Sep 17 00:00:00 2001 From: "Li, Guizi" Date: Fri, 30 Nov 2018 10:55:15 +0800 Subject: [PATCH 0203/1078] [Intel MKL] Enable MKL LeakyRelu OP --- tensorflow/core/graph/mkl_layout_pass.cc | 48 ++++++ tensorflow/core/graph/mkl_layout_pass_test.cc | 79 +++++++++ tensorflow/core/kernels/mkl_relu_op.cc | 150 ++++++++++++++---- tensorflow/core/ops/nn_ops.cc | 35 +++- 4 files changed, 282 insertions(+), 30 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 69735aac02..8d7ddbd0c3 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -258,6 +258,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { csinfo_.fused_batch_norm = "FusedBatchNorm"; csinfo_.fused_batch_norm_grad = "FusedBatchNormGrad"; csinfo_.identity = "Identity"; + csinfo_.leakyrelu = "LeakyRelu"; + csinfo_.leakyrelu_grad = "LeakyReluGrad"; csinfo_.lrn = "LRN"; csinfo_.lrn_grad = "LRNGrad"; csinfo_.matmul = "MatMul"; @@ -381,6 +383,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass { rinfo_.push_back({csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad), CopyAttrsLRN, LrnGradRewrite}); + rinfo_.push_back({csinfo_.leakyrelu, + mkl_op_registry::GetMklOpName(csinfo_.leakyrelu), + CopyAttrsLeakyRelu, LeakyReluRewrite}); + rinfo_.push_back({csinfo_.leakyrelu_grad, + mkl_op_registry::GetMklOpName(csinfo_.leakyrelu_grad), + CopyAttrsLeakyRelu, LeakyReluRewrite}); rinfo_.push_back({csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool), CopyAttrsPooling, NonDepthBatchWisePoolRewrite}); @@ -584,6 +592,8 @@ class MklLayoutRewritePass : public GraphOptimizationPass { string fused_batch_norm; string fused_batch_norm_grad; string identity; + string leakyrelu; + string leakyrelu_grad; string lrn; string lrn_grad; string matmul; @@ -891,6 +901,29 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return do_rewrite; } + // To compute LeakyRelu MKL DNN uses (feature), if feature > 0 + // otherwise it uses (feature * alpha) + // while Tensorflow uses max(feature, feature * alpha) to compute LeakyRelu. + // These two algorithm are not consistent when alpha > 1 + // so only LeakyRelu is written to MKL OP when alpha < 1 + static bool LeakyReluRewrite(const Node* n) { + CHECK_NOTNULL(n); + + float alpha; + CHECK_EQ(GetNodeAttr(n->def(), "alpha", &alpha).ok(), true); + + // If the alpha of LeakyRelu is less than 1, rewrite the node. + // Otherwise eigen node is used instead. + if (alpha < 1) { + return true; + } + VLOG(1) << "LeakyReluRewrite: The model sets alpha is not less than 1 " + << "which case is not optimized by Intel MKL, thus using Eigen op" + << "for LeakyRelu "; + + return false; + } + static bool MaxpoolGradRewrite(const Node* n) { CHECK_NOTNULL(n); bool do_rewrite = false; @@ -1078,6 +1111,7 @@ class MklLayoutRewritePass : public GraphOptimizationPass { static void CopyAttrsDataType(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsFusedBatchNorm(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsLRN(const Node* orig_node, NodeBuilder* nb); + static void CopyAttrsLeakyRelu(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsQuantizedPooling(const Node* orig_node, NodeBuilder* nb); static void CopyAttrsQuantizedConv2D(const Node* orig_node, NodeBuilder* nb); @@ -1663,6 +1697,20 @@ void MklLayoutRewritePass::CopyAttrsLRN(const Node* orig_node, nb->Attr("beta", beta); } +void MklLayoutRewritePass::CopyAttrsLeakyRelu(const Node* orig_node, + NodeBuilder* nb) { + DataType T; + float alpha; + + // Get all attributes from old node. + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "alpha", &alpha)); + + // Add attributes to new node. + nb->Attr("T", T); + nb->Attr("alpha", alpha); +} + void MklLayoutRewritePass::CopyAttrsPooling(const Node* orig_node, NodeBuilder* nb) { DataType T; diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index 7e2d1f7878..f815838a89 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -960,6 +960,85 @@ TEST_F(MklLayoutPassTest, NodeRewrite_Relu6Relu6Grad_Positive) { "DMT/_1->C:2"); } +TEST_F(MklLayoutPassTest, NodeRewrite_LeakyRelu_Positive) { + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'LeakyRelu'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'alpha' value { f: 0.1 } }" + " input: ['A'] }" + "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" + " input: ['A', 'B'] }"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(_MklLeakyRelu);C(Zeta);DMT/_0(Const)|A->B;A->C;" + "A:control->DMT/_0:control;B->C:1;DMT/_0->B:1"); +} + +TEST_F(MklLayoutPassTest, NodeRewrite_LeakyRelu_Negative) { + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'LeakyRelu'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'alpha' value { f: 2.0 } }" + " input: ['A'] }" + "node { name: 'C' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" + " input: ['A', 'B'] }"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(LeakyRelu);C(Zeta)|A->B;A->C;B->C:1"); +} + +TEST_F(MklLayoutPassTest, NodeRewrite_LeakyReluGrad_Positive) { + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Input'}" + "node { name: 'C' op: 'LeakyReluGrad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'alpha' value { f: 0.1 } }" + " input: ['A', 'B'] }" + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" + " input: ['A', 'C'] }"); + EXPECT_EQ(DoMklLayoutOptimizationPass(), + "A(Input);B(Input);C(_MklLeakyReluGrad);D(Zeta);DMT/_0(Const);" + "DMT/_1(Const)|A->C;A->D;A:control->DMT/_0:control;" + "A:control->DMT/_1:control;B->C:1;C->D:1;DMT/_0->C:2;DMT/_1->C:3"); +} + +TEST_F(MklLayoutPassTest, NodeRewrite_LeakyReluGrad_Negative) { + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'Input'}" + "node { name: 'C' op: 'LeakyReluGrad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'alpha' value { f: 2.0 } }" + " input: ['A', 'B'] }" + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" + " input: ['A', 'C'] }"); + EXPECT_EQ( + DoMklLayoutOptimizationPass(), + "A(Input);B(Input);C(LeakyReluGrad);D(Zeta)|A->C;A->D;B->C:1;C->D:1"); +} + +TEST_F(MklLayoutPassTest, NodeRewrite_LeakyReluLeakyReluGrad_Positive) { + InitGraph( + "node { name: 'A' op: 'Input'}" + "node { name: 'B' op: 'LeakyRelu'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'alpha' value { f: 0.1 } }" + " input: ['A'] }" + "node { name: 'C' op: 'LeakyReluGrad'" + " attr { key: 'T' value { type: DT_FLOAT } }" + " attr { key: 'alpha' value { f: 0.1 } }" + " input: ['A', 'B'] }" + "node { name: 'D' op: 'Zeta' attr { key: 'T' value { type: DT_FLOAT } }" + " input: ['A', 'C'] }"); + EXPECT_EQ( + DoMklLayoutOptimizationPass(), + "A(Input);B(_MklLeakyRelu);C(_MklLeakyReluGrad);D(Zeta);DMT/_0(Const);" + "DMT/_1(Const)|A->B;A->C;A->D;A:control->DMT/_0:control;" + "A:control->DMT/_1:control;B->C:1;B:1->C:3;C->D:1;DMT/_0->B:1;" + "DMT/_1->C:2"); +} + TEST_F(MklLayoutPassTest, NodeRewrite_AvgPool_Positive) { InitGraph( "node { name: 'A' op: 'Input'}" diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 708213648b..2e29eae41b 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -16,12 +16,12 @@ limitations under the License. // See docs in ../ops/nn_ops.cc. #ifdef INTEL_MKL -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/errors.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #ifndef INTEL_MKL_ML_ONLY #include "mkldnn.hpp" @@ -204,7 +204,7 @@ class MklEltwiseFwdPrimitiveFactory : public MklPrimitiveFactory { ~MklEltwiseFwdPrimitiveFactory() {} static string CreateKey(const MklEltwiseFwdParams& fwdParams, - memory::format src_fmt) { + memory::format src_fmt) { string prefix = "eltwise_fwd"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -422,8 +422,8 @@ class MklEltwiseBwdPrimitiveFactory : public MklPrimitiveFactory { private: static string CreateKey(const MklEltwiseBwdParams& bwdParams, - const memory::format& src_fmt, - const memory::format& diff_dst_fmt) { + const memory::format& src_fmt, + const memory::format& diff_dst_fmt) { string prefix = "eltwise_bwd"; FactoryKeyCreator key_creator; key_creator.AddAsKey(prefix); @@ -856,9 +856,9 @@ class MklReluOpBase : public OpKernel { Tensor* dst_tensor = nullptr; OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {static_cast(src_index)}, - static_cast(dst_index), - tf_shape_dst, &dst_tensor)); + {static_cast(src_index)}, + static_cast(dst_index), + tf_shape_dst, &dst_tensor)); AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); T* dst_data = dst_tensor->flat().data(); @@ -866,19 +866,20 @@ class MklReluOpBase : public OpKernel { // execute eltwise eltwise_fwd->Execute(src_data, dst_data); } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + - ", in file " + string(__FILE__) + ":" + - std::to_string(__LINE__); - OP_REQUIRES_OK(context, - errors::Aborted("Operation received an exception:", - error_msg)); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); + OP_REQUIRES_OK( + context, + errors::Aborted("Operation received an exception:", error_msg)); } } private: engine cpu_engine = engine(engine::cpu, 0); std::shared_ptr relu_fwd_pd; + + protected: float alpha_; float beta_; }; @@ -947,11 +948,11 @@ class MklReluGradOpBase : public OpKernel { auto diff_dst_tf_data_format = MklDnnDataFormatToTFDataFormat(diff_dst_mkl_data_format); - src_dims = (src_tensor.dims() == 4) - ? TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), - diff_dst_tf_data_format) - : TFShapeToMklDnnDimsInNCDHW(src_tensor.shape(), - diff_dst_tf_data_format); + src_dims = (src_tensor.dims() == 4) + ? TFShapeToMklDnnDimsInNCHW(src_tensor.shape(), + diff_dst_tf_data_format) + : TFShapeToMklDnnDimsInNCDHW(src_tensor.shape(), + diff_dst_tf_data_format); src_md = memory::desc(src_dims, MklDnnType(), diff_dst_mkl_data_format); } else { @@ -1001,8 +1002,7 @@ class MklReluGradOpBase : public OpKernel { // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor() || - dnn_shape_diff_dst.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { auto diff_src_pd = eltwise_bwd_pd->diff_src_primitive_desc(); dnn_shape_diff_src.SetMklTensor(true); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); @@ -1012,9 +1012,10 @@ class MklReluGradOpBase : public OpKernel { dnn_shape_src.GetSizesAsMklDnnDims(), dnn_shape_src.GetTfDataFormat()); } else { - dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(), - dnn_shape_diff_dst.GetSizesAsMklDnnDims(), - dnn_shape_diff_dst.GetTfDataFormat()); + dnn_shape_diff_src.SetTfLayout( + dnn_shape_diff_dst.GetDimension(), + dnn_shape_diff_dst.GetSizesAsMklDnnDims(), + dnn_shape_diff_dst.GetTfDataFormat()); } tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { @@ -1033,9 +1034,9 @@ class MklReluGradOpBase : public OpKernel { // execute eltwise bwd eltwise_bwd->Execute(src_data, diff_dst_data, diff_src_data); } catch (mkldnn::error& e) { - string error_msg = "Status: " + std::to_string(e.status) + - ", message: " + string(e.message) + ", in file " + - string(__FILE__) + ":" + std::to_string(__LINE__); + string error_msg = "Status: " + std::to_string(e.status) + ", message: " + + string(e.message) + ", in file " + string(__FILE__) + + ":" + std::to_string(__LINE__); OP_REQUIRES_OK( context, errors::Aborted("Operation received an exception:", error_msg)); @@ -1045,6 +1046,8 @@ class MklReluGradOpBase : public OpKernel { private: engine cpu_engine = engine(engine::cpu, 0); std::shared_ptr relu_fwd_pd; + + protected: float alpha_; float beta_; }; @@ -1312,8 +1315,84 @@ class MklRelu6GradOp T* out_o = diff_src_tensor->flat().data(); T* user_i = const_cast(src_tensor.flat().data()); T* user_g = const_cast(diff_dst_tensor.flat().data()); - out_o[0] = user_g[0] * user_i[0] > 0 && - (user_i[0] < static_cast(RELU6_UPPER_BOUND)); + out_o[0] = user_g[0] * (user_i[0] > 0 && + (user_i[0] < static_cast(RELU6_UPPER_BOUND))); + return; + } +}; + +template +class MklLeakyReluOp : public MklReluOpBase { + public: + ~MklLeakyReluOp() {} + + explicit MklLeakyReluOp(OpKernelConstruction* context) + : MklReluOpBase(context, 0.0f, 0.0f) { + float alpha; + OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha)); + OP_REQUIRES(context, alpha < 1, + errors::InvalidArgument("MKL LeakyRelu only support alpha < 1. " + "alpha is: ", + alpha)); + + this->alpha_ = alpha; + } + + virtual void Compute_Scalar(OpKernelContext* context) { + const size_t src_index = 0; // index of src input tensor + const size_t dst_index = 0; // index of dst output tensor + const Tensor& src_tensor = MklGetInput(context, src_index); + MklDnnShape dnn_shape_src; + GetMklShape(context, src_index, &dnn_shape_src); + + Tensor* dst_tensor = nullptr; + T* user_i = const_cast(src_tensor.flat().data()); + MklDnnShape dnn_shape_dst; + dnn_shape_dst.SetMklTensor(false); + AllocateOutputSetMklShape(context, dst_index, &dst_tensor, + src_tensor.shape(), dnn_shape_dst); + T* out_o = dst_tensor->flat().data(); + out_o[0] = std::max(user_i[0], user_i[0] * this->alpha_); + return; + } +}; + +template +class MklLeakyReluGradOp : public MklReluGradOpBase { + public: + ~MklLeakyReluGradOp() {} + + explicit MklLeakyReluGradOp(OpKernelConstruction* context) + : MklReluGradOpBase(context, 0.0f, 0.0f) { + float alpha; + OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha)); + OP_REQUIRES(context, alpha < 1, + errors::InvalidArgument("MKL LeakyRelu only support alpha < 1. " + "alpha is: ", + alpha)); + + this->alpha_ = alpha; + } + + virtual void Compute_Scalar(OpKernelContext* context) { + const size_t diff_dst_index = 0; // index of diff_dst input tensor + const size_t src_index = 1; // index of src input tensor + const size_t diff_src_index = 0; // index of diff_src output tensor + const Tensor& src_tensor = MklGetInput(context, src_index); + const Tensor& diff_dst_tensor = MklGetInput(context, diff_dst_index); + Tensor* diff_src_tensor = nullptr; + + MklDnnShape dnn_shape_diff_dst; + GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst); + + MklDnnShape dnn_shape_diff_src; + dnn_shape_diff_src.SetMklTensor(false); + AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, + diff_dst_tensor.shape(), dnn_shape_diff_src); + T* out_o = diff_src_tensor->flat().data(); + T* user_i = const_cast(src_tensor.flat().data()); + T* user_g = const_cast(diff_dst_tensor.flat().data()); + out_o[0] = user_i[0] > 0 ? user_g[0] : user_g[0] * this->alpha_; return; } }; @@ -1376,6 +1455,19 @@ TF_CALL_float(REGISTER_TANH_MKL_SUPPORTED_KERNELS_TYPES); MklRelu6GradOp); TF_CALL_float(REGISTER_RELU6_MKL_SUPPORTED_KERNELS_TYPES); +#define REGISTER_LeakyRelu_MKL_SUPPORTED_KERNELS_TYPES(type) \ + REGISTER_KERNEL_BUILDER(Name("_MklLeakyRelu") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklLeakyReluOp); \ + REGISTER_KERNEL_BUILDER(Name("_MklLeakyReluGrad") \ + .Device(DEVICE_CPU) \ + .TypeConstraint("T") \ + .Label(mkl_op_registry::kMklOpLabel), \ + MklLeakyReluGradOp); +TF_CALL_float(REGISTER_LeakyRelu_MKL_SUPPORTED_KERNELS_TYPES); + #endif } // namespace tensorflow diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index efa84d6c22..ea26f7d2be 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1915,6 +1915,40 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is expected to invoke these operators. )doc"); +REGISTER_OP("_MklLeakyRelu") + .Input("features: T") + .Input("mkl_features: uint8") + .Output("activations: T") + .Output("mkl_activations: uint8") + .Attr("T: {half, float, double} = DT_FLOAT") + .Attr("alpha: float = 0.2") + .SetShapeFn(shape_inference::UnchangedShape) + .Doc(R"doc( +MKL version of LeakyRelu operator. Uses MKL DNN APIs to implement +LeakyRelu operator. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + +REGISTER_OP("_MklLeakyReluGrad") + .Input("gradients: T") + .Input("features: T") + .Input("mkl_gradients: uint8") + .Input("mkl_features: uint8") + .Output("backprops: T") + .Output("mkl_backprops: uint8") + .Attr("T: {half, float, double} = DT_FLOAT") + .Attr("alpha: float = 0.2") + .SetShapeFn(shape_inference::MergeBothInputsShapeFn) + .Doc(R"doc( +MKL version of LeakyReluGrad operator. Uses MKL DNN APIs to compute rectified +linear gradients for LeakyReluGrad operation. + +NOTE Do not invoke this operator directly in Python. Graph rewrite pass is +expected to invoke these operators. +)doc"); + REGISTER_OP("_MklElu") .Input("features: T") .Input("mkl_features: uint8") @@ -2110,7 +2144,6 @@ NOTE Do not invoke this operator directly in Python. Graph rewrite pass is expected to invoke these operators. )doc"); - REGISTER_OP("_MklAvgPool3DGrad") .Input("orig_input_shape: int32") .Input("grad: T") -- GitLab From f32d071589507c755f524f9d94ea4ee4174c9498 Mon Sep 17 00:00:00 2001 From: Pan Daoxin Date: Fri, 30 Nov 2018 11:09:48 +0800 Subject: [PATCH 0204/1078] Enable reorder cache for MklSlice. --- tensorflow/core/kernels/mkl_slice_op.cc | 201 +++++++++++++++++++----- 1 file changed, 165 insertions(+), 36 deletions(-) diff --git a/tensorflow/core/kernels/mkl_slice_op.cc b/tensorflow/core/kernels/mkl_slice_op.cc index 85cabeb92b..f32a6003af 100644 --- a/tensorflow/core/kernels/mkl_slice_op.cc +++ b/tensorflow/core/kernels/mkl_slice_op.cc @@ -60,8 +60,10 @@ typedef Eigen::ThreadPoolDevice CPUDevice; // A version of SharedValidation (slice_op.h) written for input that is in // either Mkl layout or Tensorflow layout. -// A shared code to validate input shapes and check for identity, which is not dependent on the type of T. -// We do this to reduce code size by not duplicating all this for all T (float, double, int32, etc.) +// A shared code to validate input shapes and check for identity, which is not +// dependent on the type of T. +// We do this to reduce code size by not duplicating all this for all T (float, +// double, int32, etc.) static void ValidateMklInputs(OpKernelContext* context, bool* is_identity, gtl::InlinedVector* begin, gtl::InlinedVector* size) { @@ -157,13 +159,149 @@ static void CheckCommonCasesForMklInputs(OpKernelContext* context, } } +// This structure aggregates multiple inputs to Slice methods. +// Parameters from & to represents memory pointing to reorder. +// Parameters begin_dims & size_dims represents offset and length +// passed to view primitive. +struct MklSliceParams { + const memory* from; + const memory* to; + memory::dims begin_dims; + memory::dims size_dims; + + MklSliceParams(const memory* from, const memory* to, memory::dims begin_dims, + memory::dims size_dims) + : from(from), to(to), begin_dims(begin_dims), size_dims(size_dims) {} +}; + +// This implements the reuse interface of Slice reorders. +template +class MklSlicePrimitive : public MklPrimitive { + public: + explicit MklSlicePrimitive(const MklSliceParams& sliceParams) { + context_.slice_stream.reset(new stream(stream::kind::eager)); + Setup(sliceParams); + } + + ~MklSlicePrimitive() {} + + void Execute(const MklSliceParams& sliceParams) { + context_.src_mem->set_data_handle(sliceParams.from->get_data_handle()); + context_.dst_mem->set_data_handle(sliceParams.to->get_data_handle()); + context_.slice_stream->submit(context_.slice_primitives); + + context_.src_mem->set_data_handle(DummyData); + context_.dst_mem->set_data_handle(DummyData); + return; + } + + std::shared_ptr GetPrimitive() { return context_.reorder_prim; } + + private: + struct SliceContext { + std::shared_ptr src_mem; + std::shared_ptr dst_mem; + std::shared_ptr reorder_prim; + std::shared_ptr reorder_pd; + std::shared_ptr view_pd; + std::shared_ptr slice_stream; + std::vector slice_primitives; + SliceContext() + : src_mem(nullptr), dst_mem(nullptr), reorder_prim(nullptr) {} + } context_; + + engine cpu_engine_ = engine(engine::cpu, 0); + + void Setup(const MklSliceParams& sliceParams) { + context_.src_mem.reset( + new memory({sliceParams.from->get_primitive_desc().desc(), cpu_engine_}, + DummyData)); + context_.dst_mem.reset(new memory( + {sliceParams.to->get_primitive_desc().desc(), cpu_engine_}, DummyData)); + auto src_pd = context_.src_mem->get_primitive_desc(); + auto dst_pd = context_.dst_mem->get_primitive_desc(); + context_.view_pd = + std::make_shared(view::primitive_desc( + src_pd, sliceParams.size_dims, sliceParams.begin_dims)); + context_.reorder_pd = + std::make_shared(reorder::primitive_desc( + context_.view_pd->dst_primitive_desc(), dst_pd)); + context_.reorder_prim = std::make_shared( + reorder(*context_.reorder_pd, *context_.src_mem, *context_.dst_mem)); + context_.slice_primitives.push_back(*context_.reorder_prim); + } +}; + +template +class MklSlicePrimitiveFactory : public MklPrimitiveFactory { + public: + static MklSlicePrimitive* Get(const MklSliceParams& sliceParams) { + auto reorderPrim = static_cast*>( + MklSlicePrimitiveFactory::GetInstance().GetReorder(sliceParams)); + if (reorderPrim == nullptr) { + reorderPrim = new MklSlicePrimitive(sliceParams); + MklSlicePrimitiveFactory::GetInstance().SetReorder(sliceParams, + reorderPrim); + } + return reorderPrim; + } + + static MklSlicePrimitiveFactory& GetInstance() { + static MklSlicePrimitiveFactory instance_; + return instance_; + } + + private: + MklSlicePrimitiveFactory() {} + ~MklSlicePrimitiveFactory() {} + + static string CreateKey(const MklSliceParams& sliceParams) { + string prefix = "reorder"; + FactoryKeyCreator key_creator; + auto const& from_desc = sliceParams.from->get_primitive_desc().desc().data; + auto const& to_desc = sliceParams.to->get_primitive_desc().desc().data; + const int KIdxFirstStride = 0; + memory::dims from_dims(from_desc.dims, &from_desc.dims[from_desc.ndims]); + memory::dims to_dims(to_desc.dims, &to_desc.dims[to_desc.ndims]); + memory::dims from_strides( + from_desc.layout_desc.blocking.strides[KIdxFirstStride], + &from_desc.layout_desc.blocking.strides[KIdxFirstStride] + [from_desc.ndims]); + memory::dims to_strides( + to_desc.layout_desc.blocking.strides[KIdxFirstStride], + &to_desc.layout_desc.blocking.strides[KIdxFirstStride][to_desc.ndims]); + key_creator.AddAsKey(prefix); + key_creator.AddAsKey(static_cast(from_desc.format)); + key_creator.AddAsKey(static_cast(from_desc.data_type)); + key_creator.AddAsKey(from_dims); + key_creator.AddAsKey(from_strides); + key_creator.AddAsKey(static_cast(to_desc.format)); + key_creator.AddAsKey(static_cast(to_desc.data_type)); + key_creator.AddAsKey(to_dims); + key_creator.AddAsKey(to_strides); + key_creator.AddAsKey(sliceParams.begin_dims); + key_creator.AddAsKey(sliceParams.size_dims); + return key_creator.GetKey(); + } + + MklPrimitive* GetReorder(const MklSliceParams& sliceParams) { + string key = CreateKey(sliceParams); + return this->GetOp(key); + } + + void SetReorder(const MklSliceParams& sliceParams, MklPrimitive* op) { + string key = CreateKey(sliceParams); + this->SetOp(key, op); + } +}; + // MKL-DNN implementation of Slice template -class MklDnnSliceOp : public OpKernel { +class MklSliceOp : public OpKernel { public: - explicit MklDnnSliceOp(OpKernelConstruction* context) : OpKernel(context) {} + explicit MklSliceOp(OpKernelConstruction* context) : OpKernel(context) {} - ~MklDnnSliceOp() {} + ~MklSliceOp() {} void Compute(OpKernelContext* context) override { gtl::InlinedVector begin; @@ -179,17 +317,17 @@ class MklDnnSliceOp : public OpKernel { if (begin.size() >= 8) { OP_REQUIRES( context, false, - errors::Unimplemented("MklDnnSliceOp : Unhandled input dimensions")); + errors::Unimplemented("MklSliceOp : Unhandled input dimensions")); } - ComputeMklDnnSlice(context, begin, size); + ComputeMklSlice(context, begin, size); } private: // Slice op implemented using MKL-DNN APIs. - void ComputeMklDnnSlice(OpKernelContext* context, - const gtl::InlinedVector& begin, - const gtl::InlinedVector& size) { + void ComputeMklSlice(OpKernelContext* context, + const gtl::InlinedVector& begin, + const gtl::InlinedVector& size) { try { // MKL-DNN API usage below is guided by description at: // https://github.com/01org/mkl-dnn/issues/69 @@ -200,16 +338,15 @@ class MklDnnSliceOp : public OpKernel { // probably change the format). Then your steps are: // // 1. create memory primitive descriptor in_mem_pd and memory primitive - // in_mem_p for the entire source data. - // 2. create view primitive descriptor in_submem_pd based on in_mem_pd, - // initial offsets, and sub-sizes - // 3. create memory primitive descriptor out_mem_pd and memory primitive + // in_mem_p for the entire source data. create view primitive + // descriptor + // in_submem_pd based on in_mem_pd, initial offsets, and sub-sizes + // 2. create memory primitive descriptor out_mem_pd and memory primitive // out_mem_p for the output (the logical sizes should match sub-sizes - // used in step 2, but the format might be arbitrary) - // 4. create reorder primitive descriptor reorder_pd based on in_submem_pd - // and out_mem_pd - // 5. create reorder primitive itself based on reorder_pd, in_mem_p, and - // out_mem_p. + // used in step 1, but the format might be arbitrary) + // 3. create reorder primitive descriptor reorder_pd based on in_submem_pd + // and out_mem_pd. create reorder primitive itself based on reorder_pd, + // in_mem_p, and out_mem_p. // // Please notice that there is no view primitive. There is only view // primitive descriptor. And the reorder uses source memory as input but @@ -268,32 +405,24 @@ class MklDnnSliceOp : public OpKernel { src.SetUsrMem(input_md, &input_tensor); } - // Step 2 - create view primitive descriptor - auto view_pd = - view::primitive_desc(src.GetUsrMemPrimDesc(), size_dims, begin_dims) - .dst_primitive_desc(); + // Step 2 - Create memory for output. auto output_strides = CalculateTFStrides(size_dims); auto output_md = MklDnnData::CreateBlockedMemDesc(size_dims, output_strides); auto output_pd = memory::primitive_desc(output_md, cpu_engine); - - // Step 3 - Create memory for output. If input is in MklDnn layout, then - // output is also in MklDnn layout. Otherwise, output is in Tensorflow - // layout. AllocateOutputTensor(context, input_mkl_shape, &output_pd, size_dims, &output_tensor, &output_mkl_shape); DCHECK(output_tensor); DCHECK_EQ(input_mkl_shape.IsMklTensor(), output_mkl_shape.IsMklTensor()); output.SetUsrMem(output_md, output_tensor); - std::vector net; - // Step 4 - create reorder primitive desc between view_pd and output_pd. - auto reorder_pd = - reorder::primitive_desc(view_pd, output.GetUsrMemPrimDesc()); - // Step 5 - create reorder primitive itself. - net.push_back(reorder(reorder_pd, *src.GetUsrMem(), *output.GetUsrMem())); - // Execute the reorder primitive. - stream(stream::kind::eager).submit(net).wait(); + // Step 3 - create reorder primitive. + MklSliceParams sliceParams(src.GetUsrMem(), output.GetUsrMem(), + begin_dims, size_dims); + MklSlicePrimitive* reorder_prim = + MklSlicePrimitiveFactory::Get(sliceParams); + // Execute slice reorder. + reorder_prim->Execute(sliceParams); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + ", message: " + string(e.message) + ", in file " + string(__FILE__) + @@ -347,7 +476,7 @@ class MklDnnSliceOp : public OpKernel { .HostMemory("begin") \ .HostMemory("size") \ .Label(mkl_op_registry::kMklOpLabel), \ - MklDnnSliceOp); + MklSliceOp); TF_CALL_float(REGISTER_MKL_SLICE); #undef REGISTER_MKL_SLICE -- GitLab From 7d931d7b85f65b6145643fdc638aabbce779ab21 Mon Sep 17 00:00:00 2001 From: Neargye Date: Mon, 23 Apr 2018 00:12:31 +0500 Subject: [PATCH 0205/1078] change toolchain to clang --- tensorflow/contrib/android/cmake/build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/android/cmake/build.gradle b/tensorflow/contrib/android/cmake/build.gradle index 17a57b99fd..9e7fd317f3 100644 --- a/tensorflow/contrib/android/cmake/build.gradle +++ b/tensorflow/contrib/android/cmake/build.gradle @@ -22,8 +22,8 @@ android { } externalNativeBuild { cmake { - arguments '-DANDROID_TOOLCHAIN=gcc', - '-DANDROID_STL=gnustl_static' + arguments '-DANDROID_TOOLCHAIN=clang', + '-DANDROID_STL=c++_static' } } } -- GitLab From f56a6058a44edb2dc6172a0723fb92fd63e7a36f Mon Sep 17 00:00:00 2001 From: neargye Date: Fri, 30 Nov 2018 16:57:33 +0500 Subject: [PATCH 0206/1078] clean-up --- tensorflow/contrib/android/cmake/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/android/cmake/build.gradle b/tensorflow/contrib/android/cmake/build.gradle index 9e7fd317f3..ddec08894f 100644 --- a/tensorflow/contrib/android/cmake/build.gradle +++ b/tensorflow/contrib/android/cmake/build.gradle @@ -70,7 +70,7 @@ if (ndkDir == null || ndkDir == "") { ndkDir = System.getenv('ANDROID_NDK_HOME') } -if(! Os.isFamily(Os.FAMILY_WINDOWS)) { +if (!Os.isFamily(Os.FAMILY_WINDOWS)) { // This script is for non-Windows OS. For Windows OS, MANUALLY build // (or copy the built) libs/headers to the // ${TENSORFLOW_ROOT_DIR}/tensorflow/contrib/makefile/gen -- GitLab From e318a39eae23b4699f923f08958c774611454236 Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Fri, 30 Nov 2018 08:12:30 -0800 Subject: [PATCH 0207/1078] Implement TensorListConcat and TensorListSplit. Remove GPU kernel registrations for TensorListSetItem and TensorListGetItem for string element_type since that triggers a non-DMA-copy of string which is not supported. PiperOrigin-RevId: 223517409 --- .../base_api/api_def_TensorListConcat.pbtxt | 12 + .../base_api/api_def_TensorListSplit.pbtxt | 13 + .../python_api/api_def_TensorListConcat.pbtxt | 4 + .../python_api/api_def_TensorListSplit.pbtxt | 4 + tensorflow/core/kernels/list_kernels.cc | 44 ++- tensorflow/core/kernels/list_kernels.cu.cc | 15 +- tensorflow/core/kernels/list_kernels.h | 310 +++++++++++++++--- tensorflow/core/ops/list_ops.cc | 249 +++++++++----- .../python/kernel_tests/list_ops_test.py | 211 ++++++++++++ .../kernel_tests/tensor_array_ops_test.py | 61 ++-- tensorflow/python/ops/list_ops.py | 36 +- tensorflow/python/ops/tensor_array_ops.py | 31 +- 12 files changed, 814 insertions(+), 176 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorListConcat.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_TensorListSplit.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListConcat.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorListSplit.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_TensorListConcat.pbtxt b/tensorflow/core/api_def/base_api/api_def_TensorListConcat.pbtxt new file mode 100644 index 0000000000..909c09aa12 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_TensorListConcat.pbtxt @@ -0,0 +1,12 @@ +op { + graph_op_name: "TensorListConcat" + summary: "Concats all tensors in the list along the 0th dimension." + description: <