From 071e6175dcc130b4c623e849a380d6434289eb66 Mon Sep 17 00:00:00 2001 From: Erik Smistad Date: Thu, 24 May 2018 15:47:00 +0200 Subject: [PATCH 001/570] Added the -Thost=x64 flag to cmake build instructions --- tensorflow/contrib/cmake/README.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 0b79f718d4..5c203b777c 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -106,17 +106,6 @@ Step-by-step Windows build 1. Install the prerequisites detailed above, and set up your environment. - * The following commands assume that you are using the Windows Command - Prompt (`cmd.exe`). You will need to set up your environment to use the - appropriate toolchain, i.e. the 64-bit tools. (Some of the binary targets - we will build are too large for the 32-bit tools, and they will fail with - out-of-memory errors.) The typical command to do set up your - environment is: - - ``` - D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat" - ``` - * When building with GPU support after installing the CUDNN zip file from NVidia, append its bin directory to your PATH environment variable. In case TensorFlow fails to find the CUDA dll's during initialization, check your PATH environment variable. @@ -168,7 +157,7 @@ Step-by-step Windows build and must be the last character on each line. ``` - D:\...\build> cmake .. -A x64 -DCMAKE_BUILD_TYPE=Release ^ + D:\...\build> cmake .. -A x64 -Thost=x64 -DCMAKE_BUILD_TYPE=Release ^ More? -DSWIG_EXECUTABLE=C:/tools/swigwin-3.0.10/swig.exe ^ More? -DPYTHON_EXECUTABLE=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/python.exe ^ More? -DPYTHON_LIBRARIES=C:/Users/%USERNAME%/AppData/Local/Continuum/Anaconda3/libs/python35.lib @@ -197,6 +186,10 @@ Step-by-step Windows build not currently supported, because it relies on a `Debug` library for Python (`python35d.lib`) that is not distributed by default. + The `-Thost=x64` flag will ensure that the 64 bit compiler and linker + is used when building. Without this flag, MSBuild will use the 32 bit + toolchain which is prone to compile errors such as "compiler out of heap space". + There are various options that can be specified when generating the solution and project files: @@ -263,6 +256,11 @@ Step-by-step Windows build 4. Invoke MSBuild to build TensorFlow. + Set up the path to find MSbuild: + ``` + D:\temp> "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\amd64\vcvarsall.bat" + ``` + To build the C++ example program, which will be created as a `.exe` executable in the subdirectory `.\Release`: -- GitLab From 6890731b2693f6b71dedaca6b2eaf8b488226836 Mon Sep 17 00:00:00 2001 From: Erik Smistad Date: Thu, 24 May 2018 15:47:22 +0200 Subject: [PATCH 002/570] increase minimum cmake version required to 3.8 --- tensorflow/contrib/cmake/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 0708d6b7b9..225c5e6227 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -1,5 +1,9 @@ # Minimum CMake required -cmake_minimum_required(VERSION 3.5) +if(WIN32) + cmake_minimum_required(VERSION 3.8) +else() + cmake_minimum_required(VERSION 3.5) +endif() # Project project(tensorflow C CXX) -- GitLab From 2e436951bb63a0294848b6f6d3746e449a305ad1 Mon Sep 17 00:00:00 2001 From: Stefan Dyulgerov Date: Tue, 17 Jul 2018 22:37:19 +0300 Subject: [PATCH 003/570] version_info.cc generated only once version_info.cc in the cmake files is generated every time when we build tensorflow and this forces rebuild of the whole project, since it is in the core library. added make.bat for windows, which does the same as make.sh to be executed easily from a build machine. the default now is visual studio 17 --- tensorflow/contrib/cmake/make.bat | 38 +++++++++++++++++++ .../contrib/cmake/tf_core_framework.cmake | 23 +++++++---- 2 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 tensorflow/contrib/cmake/make.bat diff --git a/tensorflow/contrib/cmake/make.bat b/tensorflow/contrib/cmake/make.bat new file mode 100644 index 0000000000..d52b24e01d --- /dev/null +++ b/tensorflow/contrib/cmake/make.bat @@ -0,0 +1,38 @@ +%echo off + +cd /d %~dp0 + +if exist _build rd /s /q _build + +mkdir _build +chdir _build + + +rem cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install + +CALL :NORMALIZEPATH "..\..\..\.." +SET SOURCE_DIR=%RETVAL% + +echo %SOURCE_DIR% + +SET SOURCE_DIR=F:\frameworks\tensorflow\ + +CALL :NORMALIZEPATH "../../../tools/git/gen_git_source.py" +SET SOURCE_PYTHON_SCRIPT=%RETVAL% + +CALL :NORMALIZEPATH "../../../core/util/version_info.cc" +SET SOURCE_VERSION_CC=%RETVAL% + +python %SOURCE_PYTHON_SCRIPT% --raw_generate %SOURCE_VERSION_CC% --source_dir %SOURCE_DIR% --git_tag_override= + +cmake ../ -G "Visual Studio 15 Win64" -DCMAKE_GENERATOR_TOOLSET=v141,host=x64 -DCMAKE_INSTALL_PREFIX:PATH=.\install + +EXIT /B + +:NORMALIZEPATH + SET RETVAL=%~dpfn1 + EXIT /B + + + + \ No newline at end of file diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 067c299a71..7e806685b8 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -258,14 +258,21 @@ add_dependencies(tf_core_lib ${tensorflow_EXTERNAL_DEPENDENCIES} tf_protos_cc) # force_rebuild always runs forcing ${VERSION_INFO_CC} target to run # ${VERSION_INFO_CC} would cache, but it depends on a phony never produced # target. -set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) -add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC}) -add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) -add_custom_command(OUTPUT - ${VERSION_INFO_CC} - COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py - ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE} - DEPENDS __force_rebuild) +# This code forces rebuild every time, not needed as version from git is fetched only once +# move to make.bat which mimicks make.sh + +if (NOT WIN32) + + set(VERSION_INFO_CC ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) + add_custom_target(force_rebuild_target ALL DEPENDS ${VERSION_INFO_CC}) + add_custom_command(OUTPUT __force_rebuild COMMAND ${CMAKE_COMMAND} -E echo) + add_custom_command(OUTPUT + ${VERSION_INFO_CC} + COMMAND ${PYTHON_EXECUTABLE} ${tensorflow_source_dir}/tensorflow/tools/git/gen_git_source.py + ARGS --raw_generate ${VERSION_INFO_CC} --source_dir ${tensorflow_source_dir} --git_tag_override=${GIT_TAG_OVERRIDE} + DEPENDS __force_rebuild) +endif() + set(tf_version_srcs ${tensorflow_source_dir}/tensorflow/core/util/version_info.cc) ######################################################## -- GitLab From 29f596cf21f0332c1e2ece8798fdd9fefd2ba947 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 4 Jun 2018 14:04:59 +0000 Subject: [PATCH 004/570] Improve the shape function of Bincount There was not a lot of restriction in shape function of Bincount and the output shape was unknown. It is actually possible to get a better shape output if `size` input is known. This fix adds enhancement to the shape function of Bincount. Signed-off-by: Yong Tang --- tensorflow/core/ops/math_ops.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 1667c398f4..7d0f29368b 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1416,6 +1416,10 @@ REGISTER_OP("Bincount") .Attr("T: {int32, int64, float32, float64}") .Output("bins: T") .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + // The input `size` must be a scalar. + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + c->set_output(0, c->UnknownShapeOfRank(1)); return Status::OK(); }); -- GitLab From 740c58b6fa5b6e1c85f688fbda322da0231aa169 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Mon, 4 Jun 2018 14:44:44 +0000 Subject: [PATCH 005/570] Return `[size]` shape if size is known for Bincount. Signed-off-by: Yong Tang --- tensorflow/core/ops/math_ops.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 7d0f29368b..b57385f63b 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1420,7 +1420,19 @@ REGISTER_OP("Bincount") // The input `size` must be a scalar. TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - c->set_output(0, c->UnknownShapeOfRank(1)); + const Tensor* size_tensor = c->input_tensor(1); + if (size_tensor == nullptr) { + // Return unknown shape if size is not known. + c->set_output(0, c->UnknownShapeOfRank(1)); + return Status::OK(); + } + + // Return `[size]` shape if size is known. + int32 size_val = size_tensor->scalar()(); + if (size_val < 0) { + return errors::InvalidArgument("size (", size_val, ") must be non-negative"); + } + c->set_output(0, c->MakeShape({size_val})); return Status::OK(); }); -- GitLab From e6981fc2225a529427391e98f492eee7bb865988 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 11 Aug 2018 18:39:13 +0000 Subject: [PATCH 006/570] Add additional test cases for Bincount Shape function, and fix clang-format issue Signed-off-by: Yong Tang --- tensorflow/core/ops/math_ops.cc | 3 ++- tensorflow/core/ops/math_ops_test.cc | 12 ++++++++++++ .../python/kernel_tests/bincount_op_test.py | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index b57385f63b..0ba4a9a005 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -1430,7 +1430,8 @@ REGISTER_OP("Bincount") // Return `[size]` shape if size is known. int32 size_val = size_tensor->scalar()(); if (size_val < 0) { - return errors::InvalidArgument("size (", size_val, ") must be non-negative"); + return errors::InvalidArgument("size (", size_val, + ") must be non-negative"); } c->set_output(0, c->MakeShape({size_val})); return Status::OK(); diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc index 23f1538912..7bf7c476f4 100644 --- a/tensorflow/core/ops/math_ops_test.cc +++ b/tensorflow/core/ops/math_ops_test.cc @@ -558,4 +558,16 @@ TEST(MathOpsTest, QuantizedAdd_ShapeFn) { INFER_ERROR("must be rank 0", op, "?;?;?;?;[3];?"); INFER_ERROR("must be rank 0", op, "?;?;?;?;?;[4]"); } + +TEST(MathOpsTest, Bincount_ShapeFn) { + ShapeInferenceTestOp op("Bincount"); + + // size should be scalar. + INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;[1];?"); + + INFER_OK(op, "?;?;?", "[?]"); + INFER_OK(op, "?;[];?", "[?]"); + INFER_OK(op, "[?];[];?", "[?]"); + INFER_OK(op, "[?];[];[?]", "[?]"); +} } // end namespace tensorflow diff --git a/tensorflow/python/kernel_tests/bincount_op_test.py b/tensorflow/python/kernel_tests/bincount_op_test.py index 2767df127e..15d9de56db 100644 --- a/tensorflow/python/kernel_tests/bincount_op_test.py +++ b/tensorflow/python/kernel_tests/bincount_op_test.py @@ -22,6 +22,8 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest @@ -97,6 +99,23 @@ class BincountTest(test_util.TensorFlowTestCase): with self.assertRaises(errors.InvalidArgumentError): math_ops.bincount([1, 2, 3, -1, 6, 8]).eval() + def test_shape_function(self): + # size must be scalar. + with self.assertRaisesRegexp( + ValueError, "Shape must be rank 0 but is rank 1 for 'Bincount'"): + gen_math_ops.bincount([1, 2, 3, -1, 6, 8], [1], []) + # size must be positive. + with self.assertRaisesRegexp( + ValueError, "must be non-negative"): + gen_math_ops.bincount([1, 2, 3, -1, 6, 8], -5, []) + # if size is a constant then the shape is known. + v1 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], 5, []) + self.assertAllEqual(v1.get_shape().as_list(), [5]) + # if size is a placeholder then the shape is unknown. + s = array_ops.placeholder(dtype=dtypes.int32) + v2 = gen_math_ops.bincount([1, 2, 3, -1, 6, 8], s, []) + self.assertAllEqual(v2.get_shape().as_list(), [None]) + if __name__ == "__main__": googletest.main() -- GitLab From aa25cc078c9b55e5ca3e0f59df43e169bfee8f3c Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Thu, 16 Aug 2018 19:04:37 +0800 Subject: [PATCH 007/570] Add LeakyRelu C++ Op and its gradient implementation. LeakyRelu, defined as 'y = { x (x>=0) or alpha*x (x<0) }', was computed by combined Ops 'max(x, alpha*x)' in current codes. Hence its gradient calculation for back propagation would contain a serial of element-wise Ops. This looks really unnecessary for such a simple op and it could be done within just one Op with less memory accesses. --- tensorflow/cc/gradients/nn_grad.cc | 13 ++ tensorflow/cc/gradients/nn_grad_test.cc | 13 ++ tensorflow/core/kernels/relu_op.cc | 153 +++++++++++------- tensorflow/core/kernels/relu_op.h | 59 +++++++ tensorflow/core/kernels/relu_op_functor.h | 31 ++++ tensorflow/core/kernels/relu_op_gpu.cu.cc | 18 ++- tensorflow/core/ops/nn_ops.cc | 15 ++ tensorflow/core/ops/ops.pbtxt | 68 ++++++++ tensorflow/python/eager/pywrap_tfe_src.cc | 2 + .../python/kernel_tests/relu_op_test.py | 113 +++++++++++++ tensorflow/python/ops/nn_grad.py | 15 ++ tensorflow/python/ops/nn_ops.py | 3 +- 12 files changed, 432 insertions(+), 71 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 588e96cb19..0fc23d0bf7 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -143,6 +143,19 @@ Status Relu6GradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("Relu6", Relu6GradHelper); +Status LeakyReluGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + attrs.Alpha(alpha); + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper); + Status EluGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index aa72cf7ba2..5ebece7b6e 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -41,6 +41,7 @@ using ops::MaxPoolV2; using ops::Placeholder; using ops::Relu; using ops::Relu6; +using ops::LeakyRelu; using ops::Selu; using ops::Softmax; using ops::Softplus; @@ -160,6 +161,18 @@ TEST_F(NNGradTest, Relu6Grad) { RunTest(x, x_init_value, y, shape); } +TEST_F(NNGradTest, LeakyReluGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + auto y = LeakyRelu(scope_, x); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + RunTest(x, x_init_value, y, shape); +} + TEST_F(NNGradTest, EluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc index d52358737f..c4f2ef5632 100644 --- a/tensorflow/core/kernels/relu_op.cc +++ b/tensorflow/core/kernels/relu_op.cc @@ -33,19 +33,25 @@ typedef Eigen::GpuDevice GPUDevice; typedef Eigen::SyclDevice SYCLDevice; #endif // TENSORFLOW_USE_SYCL -#define REGISTER_RELU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_CPU).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_CPU).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint("T"), \ - Relu6GradOp) +#define REGISTER_RELU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu").Device(DEVICE_CPU).TypeConstraint("T"), \ + ReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReluGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ + ReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6").Device(DEVICE_CPU).TypeConstraint("T"), \ + Relu6Op); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6Grad").Device(DEVICE_CPU).TypeConstraint("T"), \ + Relu6GradOp) \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyRelu").Device(DEVICE_CPU).TypeConstraint("T"), \ + LeakyReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyReluGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ + LeakyReluGradOp); TF_CALL_REAL_NUMBER_TYPES(REGISTER_RELU_KERNELS); #undef REGISTER_RELU_KERNELS @@ -99,6 +105,19 @@ namespace functor { extern template struct Relu6Grad; \ \ template <> \ + void LeakyRelu::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor features, \ + T alpha, typename TTypes::Tensor activations); \ + extern template struct LeakyRelu; \ + \ + template <> \ + void LeakyReluGrad::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor gradients, \ + typename TTypes::ConstTensor features, \ + T alpha, typename TTypes::Tensor backprops); \ + extern template struct LeakyReluGrad; \ + \ + template <> \ void Elu::operator()(const GPUDevice& d, \ typename TTypes::ConstTensor features, \ typename TTypes::Tensor activations); \ @@ -128,30 +147,36 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); } // namespace functor // Registration of the GPU implementations. -#define REGISTER_GPU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_GPU).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_GPU).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint("T"), \ - Relu6GradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Elu").Device(DEVICE_GPU).TypeConstraint("T"), \ - EluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("EluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ - EluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Selu").Device(DEVICE_GPU).TypeConstraint("T"), \ - SeluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ +#define REGISTER_GPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu").Device(DEVICE_GPU).TypeConstraint("T"), \ + ReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + ReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6").Device(DEVICE_GPU).TypeConstraint("T"), \ + Relu6Op); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6Grad").Device(DEVICE_GPU).TypeConstraint("T"), \ + Relu6GradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyRelu").Device(DEVICE_GPU).TypeConstraint("T"), \ + LeakyReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyReluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + LeakyReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Elu").Device(DEVICE_GPU).TypeConstraint("T"), \ + EluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("EluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + EluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Selu").Device(DEVICE_GPU).TypeConstraint("T"), \ + SeluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("SeluGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ SeluGradOp) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); @@ -161,30 +186,36 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); #ifdef TENSORFLOW_USE_SYCL // Registration of the GPU implementations. -#define REGISTER_SYCL_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - ReluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - ReluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6").Device(DEVICE_SYCL).TypeConstraint("T"), \ - Relu6Op); \ - REGISTER_KERNEL_BUILDER( \ - Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - Relu6GradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Elu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - EluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ - EluGradOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("Selu").Device(DEVICE_SYCL).TypeConstraint("T"), \ - SeluOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ +#define REGISTER_SYCL_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + ReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("ReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + ReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6").Device(DEVICE_SYCL).TypeConstraint("T"), \ + Relu6Op); \ + REGISTER_KERNEL_BUILDER( \ + Name("Relu6Grad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + Relu6GradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyRelu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + LeakyReluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("LeakyReluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + LeakyReluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Elu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + EluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("EluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ + EluGradOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Selu").Device(DEVICE_SYCL).TypeConstraint("T"), \ + SeluOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("SeluGrad").Device(DEVICE_SYCL).TypeConstraint("T"), \ SeluGradOp) TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNELS); diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h index e712b02bd7..c55190065c 100644 --- a/tensorflow/core/kernels/relu_op.h +++ b/tensorflow/core/kernels/relu_op.h @@ -131,6 +131,65 @@ void Relu6GradOp::OperateNoTemplate(OpKernelContext* context, output->flat()); } +template +class LeakyReluOp : public UnaryElementWiseOp> { + public: + explicit LeakyReluOp(OpKernelConstruction* context) + : UnaryElementWiseOp>(context) { + float alpha_tmp; + OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp)); + alpha_ = T(alpha_tmp); + } + + void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { + functor::LeakyRelu functor; + functor(context->eigen_device(), input.flat(), + alpha_, output->flat()); + } + + private: + T alpha_; +}; + +template +class LeakyReluGradOp + : public BinaryElementWiseOp> { + public: + explicit LeakyReluGradOp(OpKernelConstruction* context) + : BinaryElementWiseOp>(context) { + float alpha_tmp; + OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_tmp)); + alpha_ = T(alpha_tmp); + } + + void OperateNoTemplate(OpKernelContext* context, const Tensor& g, + const Tensor& a, T alpha, Tensor* output); + + // INPUTS: + // g (gradients): backpropagated gradients + // a (inputs): either the inputs that were passed to LeakyReluOp(), or its + // outputs (using either one yields the same result here). + // OUTPUT: + // gradients to backprop + template + void Operate(OpKernelContext* context, const Tensor& g, const Tensor& a, + Tensor* output) { + OperateNoTemplate(context, g, a, alpha_, output); + } + + private: + T alpha_; +}; + +template +void LeakyReluGradOp::OperateNoTemplate(OpKernelContext* context, + const Tensor& g, const Tensor& a, T alpha, Tensor* output) { + if (!ReluHelpers::ValidateSameSize(context, g, a)) return; + functor::LeakyReluGrad functor; + functor(context->eigen_device(), g.flat(), a.flat(), alpha, + output->flat()); +}; + template class EluOp : public UnaryElementWiseOp> { public: diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h index 3bc5ba8a50..7f0951451d 100644 --- a/tensorflow/core/kernels/relu_op_functor.h +++ b/tensorflow/core/kernels/relu_op_functor.h @@ -91,6 +91,37 @@ struct Relu6Grad { } }; + +// Functor used by LeakyReluOp to do the computations. +template +struct LeakyRelu { + // Computes LeakyRelu activation. + // + // features: any shape. + // activations: same shape as "features". + void operator()(const Device& d, typename TTypes::ConstTensor features, + T alpha, typename TTypes::Tensor activations) { + activations.device(d) = features.cwiseMax(features * alpha); + } +}; + +// Functor used by LeakyReluGradOp to do the computations. +template +struct LeakyReluGrad { + // Computes LeakyReluGrad backprops. + // + // gradients: gradients backpropagated to the LeakyRelu op. + // features: either the inputs that were passed to the LeakyRelu or, or its + // outputs (using either one yields the same result here). + // backprops: gradients to backpropagate to the LeakyRelu inputs. + void operator()(const Device& d, typename TTypes::ConstTensor gradients, + typename TTypes::ConstTensor features, T alpha, + typename TTypes::Tensor backprops) { + backprops.device(d) = + (features > static_cast(0)).select(gradients, gradients * alpha); + } +}; + // Functor used by EluOp to do the computations. template struct Elu { diff --git a/tensorflow/core/kernels/relu_op_gpu.cu.cc b/tensorflow/core/kernels/relu_op_gpu.cu.cc index 089ca8ed27..4452f4dcc9 100644 --- a/tensorflow/core/kernels/relu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/relu_op_gpu.cu.cc @@ -114,14 +114,16 @@ struct ReluGrad { } // namespace functor // Definition of the GPU implementations declared in relu_op.cc. -#define DEFINE_GPU_KERNELS(T) \ - template struct functor::Relu; \ - template struct functor::ReluGrad; \ - template struct functor::Relu6; \ - template struct functor::Relu6Grad; \ - template struct functor::Elu; \ - template struct functor::EluGrad; \ - template struct functor::Selu; \ +#define DEFINE_GPU_KERNELS(T) \ + template struct functor::Relu; \ + template struct functor::ReluGrad; \ + template struct functor::Relu6; \ + template struct functor::Relu6Grad; \ + template struct functor::LeakyRelu; \ + template struct functor::LeakyReluGrad; \ + template struct functor::Elu; \ + template struct functor::EluGrad; \ + template struct functor::Selu; \ template struct functor::SeluGrad; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index e0f25fb4ef..023f988f80 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -983,6 +983,21 @@ REGISTER_OP("Relu6Grad") .Attr("T: realnumbertype") .SetShapeFn(shape_inference::MergeBothInputsShapeFn); +REGISTER_OP("LeakyRelu") + .Input("features: T") + .Output("activations: T") + .Attr("alpha: float = 0.2") + .Attr("T: {half, float, double} = DT_FLOAT") + .SetShapeFn(shape_inference::UnchangedShape); + +REGISTER_OP("LeakyReluGrad") + .Input("gradients: T") + .Input("features: T") + .Output("backprops: T") + .Attr("alpha: float = 0.2") + .Attr("T: {half, float, double} = DT_FLOAT") + .SetShapeFn(shape_inference::MergeBothInputsShapeFn); + REGISTER_OP("Elu") .Input("features: T") .Output("activations: T") diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index f2595279e0..837e91bc23 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -13604,6 +13604,74 @@ op { minimum: 1 } } +op { + name: "LeakyRelu" + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "activations" + type_attr: "T" + } + attr { + name: "alpha" + type: "float" + default_value { + f: 0.2 + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} +op { + name: "LeakykReluGrad" + input_arg { + name: "gradients" + type_attr: "T" + } + input_arg { + name: "features" + type_attr: "T" + } + output_arg { + name: "backprops" + type_attr: "T" + } + attr { + name: "alpha" + type: "float" + default_value { + f: 0.2 + } + } + attr { + name: "T" + type: "type" + default_value { + type: DT_FLOAT + } + allowed_values { + list { + type: DT_HALF + type: DT_FLOAT + type: DT_DOUBLE + } + } + } +} op { name: "LearnedUnigramCandidateSampler" input_arg { diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 2d54555cd3..9b3b5fd7aa 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) { "SoftplusGrad", "Softsign", "ReluGrad", + "LeakyReluGrad", "Conv2D", "DepthwiseConv2dNative", "Dilation2D", @@ -1799,6 +1800,7 @@ bool OpDoesntRequireInput(const string& op_name) { "BiasAdd", "Relu", "Relu6", + "LeakyRelu", "Elu", "Selu", "SparseSoftmaxCrossEntropyWithLogits", diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 25e947f09e..ccb3a231bb 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -252,6 +252,119 @@ class Relu6Test(test.TestCase): self.assertLess(err, 1e-10) +class LeakyReluTest(test.TestCase): + + def _npLeakyRelu(self, np_features, alpha=0.1): + return np.maximum(np_features, alpha * np_features) + + def testNpLeakyRelu(self): + self.assertAllClose( + np.array([[-0.09, 0.7, -0.05, 0.3, -0.01], + [0.1, -0.03, 0.5, -0.07, 0.9]]), + self._npLeakyRelu( + np.array([[-0.9, 0.7, -0.5, 0.3, -0.1], [0.1, -0.3, 0.5, -0.7, 0.9] + ]), alpha=0.1)) + + def _testLeakyRelu(self, np_features, alpha, use_gpu=False): + np_leaky_relu = self._npLeakyRelu(np_features, alpha) + with self.test_session(use_gpu=use_gpu): + leaky_relu = nn_ops.leaky_relu(np_features, alpha) + tf_leaky_relu = leaky_relu.eval() + self.assertAllClose(np_leaky_relu, tf_leaky_relu) + self.assertShapeEqual(np_leaky_relu, leaky_relu) + + def testNumbers(self): + for t in [np.int32, np.int64, np.float16, np.float32, np.float64]: + self._testLeakyRelu( + np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t), + alpha=0.2, use_gpu=False) + if t in [np.float16, np.float32, np.float64]: + self._testLeakyRelu( + np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t), + alpha=0.1, use_gpu=True) + + # The gradient test for ReLU is a bit tricky as the derivative is not well + # defined at around zero and we want to avoid that in terms of input values. + def testGradientFloat32(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], y, [2, 5], x_init_value=x_init) + print("leaky_relu (float32) gradient err = ", err) + self.assertLess(err, 1e-4) + + def testGradientFloat64(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.2, name="leaky_relu") + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], y, [2, 5], x_init_value=x_init) + print("leaky_relu (float64) gradient err = ", err) + self.assertLess(err, 1e-10) + + def testGradGradFloat32(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float32) gradient of gradient err = ", err) + self.assertLess(err, 1e-4) + + def testGradGradFloat64(self): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float64) gradient of gradient err = ", err) + self.assertLess(err, 1e-10) + + def testGradientScalar(self): + with self.test_session() as sess: + x = variables.Variable(-100.) + y = nn_ops.leaky_relu(x, 0.05) + loss = y**2 + optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=0.2) + train_op = optimizer.minimize(loss) + sess.run(variables.global_variables_initializer()) + sess.run(train_op) + self.assertAllClose(x.eval(), -99.9) + + class EluTest(test.TestCase): def _npElu(self, np_features): diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index df23ac55ce..c2dd58bdf0 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -390,6 +390,21 @@ def _Relu6GradGrad(op, grad): array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) +@ops.RegisterGradient("LeakyRelu") +def _LeakyReluGrad(op, grad): + x = op.inputs[0] + alpha = op.get_attr("alpha") + return gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha) + + +@ops.RegisterGradient("LeakyReluGrad") +def _LeakyReluGradGrad(op, grad): + x = op.inputs[1] + alpha = op.get_attr("alpha") + return (gen_nn_ops.leaky_relu_grad(grad, x, alpha=alpha), + array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) + + @ops.RegisterGradient("Elu") def _EluGrad(op, grad): return gen_nn_ops.elu_grad(grad, op.outputs[0]) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 6fd1273687..31b8f3945d 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1601,8 +1601,7 @@ def leaky_relu(features, alpha=0.2, name=None): features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) - alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") - return math_ops.maximum(alpha * features, features, name=name) + return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) def _flatten_outer_dims(logits): -- GitLab From cb5c61a3e11a37fb39a246aaf8ed6d02dd9ae9ab Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Fri, 24 Aug 2018 11:51:34 +0800 Subject: [PATCH 008/570] Refine LeakyRelu codes and update APIs. --- .../api_def/base_api/api_def_LeakyRelu.pbtxt | 4 ++++ .../base_api/api_def_LeakyReluGrad.pbtxt | 24 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 2 +- tensorflow/python/eager/pywrap_tfe_src.cc | 2 +- 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt new file mode 100644 index 0000000000..4a61889f54 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "LeakyRelu" + summary: "Computes rectified linear: `max(features, features * alpha)`." +} diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt new file mode 100644 index 0000000000..e427526602 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_LeakyReluGrad.pbtxt @@ -0,0 +1,24 @@ +op { + graph_op_name: "LeakyReluGrad" + visibility: HIDDEN + in_arg { + name: "gradients" + description: < 0) + alpha * gradients * (featurs <= 0)`. +END + } + summary: "Computes rectified linear gradients for a LeakyRelu operation." +} diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 837e91bc23..7693c2d485 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -13637,7 +13637,7 @@ op { } } op { - name: "LeakykReluGrad" + name: "LeakyReluGrad" input_arg { name: "gradients" type_attr: "T" diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 9b3b5fd7aa..18fafd0de1 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) { "SoftplusGrad", "Softsign", "ReluGrad", + "LeakyRelu", "LeakyReluGrad", "Conv2D", "DepthwiseConv2dNative", @@ -1800,7 +1801,6 @@ bool OpDoesntRequireInput(const string& op_name) { "BiasAdd", "Relu", "Relu6", - "LeakyRelu", "Elu", "Selu", "SparseSoftmaxCrossEntropyWithLogits", -- GitLab From 7a54c15804f7bb0d0c40fea5c84b1f4acee58bac Mon Sep 17 00:00:00 2001 From: Stefan Dyulgerov Date: Sat, 25 Aug 2018 13:18:11 +0300 Subject: [PATCH 009/570] upgraded protobuf to v.3.6.1 --- tensorflow/contrib/cmake/external/protobuf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index f56fb35a0f..56a57a2340 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG v3.6.0) +set(PROTOBUF_TAG v3.6.1) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") -- GitLab From e93a9f9ccfd9c7a2419bf3fc1d7866765bbcfce3 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Tue, 28 Aug 2018 18:55:51 -0700 Subject: [PATCH 010/570] Update GPU occupancy checking to utilize CUDA's occupancy calculator functions -Replace references to the UnqueryableDeviceParams struct with calls to CUDA's built-in occupancy calculation functions -Update calls to the occupancy checking functions with the new changes -Changes should provide more long-term reliability and will remove the need to manually update hardcoded data values for new GPU architectures --- .../xla/service/gpu/partition_assignment.cc | 9 +- .../stream_executor/cuda/cuda_gpu_executor.cc | 192 ++---------------- .../stream_executor/device_description.cc | 98 +++------ .../stream_executor/device_description.h | 73 ++----- 4 files changed, 61 insertions(+), 311 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc index cf9f102d31..375f68a159 100644 --- a/tensorflow/compiler/xla/service/gpu/partition_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/partition_assignment.cc @@ -62,13 +62,8 @@ LaunchDimensions CalculateLaunchDimensions( // // * = - auto threads_per_core = device_desc.threads_per_core_limit(); - auto blocks_per_core = device_desc.blocks_per_core_limit(); - int64 threads_per_block; - if (threads_per_core != 0 && blocks_per_core != 0) { - threads_per_block = device_desc.threads_per_core_limit() / - device_desc.blocks_per_core_limit(); - } else { + int64 threads_per_block = device_desc.threads_per_block_limit(); + if (threads_per_block == 0) { static std::atomic log_count{0}; if (log_count.fetch_add(1) < 8) { LOG(WARNING) << "Attempting to calculate launch dimensions for GPU " diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index e30f50ea2a..39b0696c93 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -467,33 +467,26 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, return; } + int block_size = thread_dims.x * thread_dims.y * thread_dims.z; + const DeviceDescription &device_description = kernel.parent()->GetDeviceDescription(); - uint64 blocks_per_sm = CalculateOccupancy( - device_description, regs_per_thread, smem_per_block, thread_dims); - VLOG(2) << "Resident blocks per SM is " << blocks_per_sm; + const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel); + CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue(); - // To increase occupancy, there must be a sufficient number of blocks - // available to spread across the sm's at this new improved occupancy level. - int multiprocessor_count = device_description.core_count(); - int block_count = block_dims.x * block_dims.y * block_dims.z; - int available_blocks_per_sm = - port::MathUtil::CeilOfRatio(block_count, multiprocessor_count); - if (available_blocks_per_sm <= static_cast(blocks_per_sm)) { - VLOG(2) << "Occupancy is limited by number of blocks available per sm."; - return; - } + int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread, + smem_per_block, thread_dims, cufunc); + VLOG(2) << "Resident blocks per SM is " << blocks_per_sm; - uint64 improved_regs_per_thread = CalculateRegisterLimitForTargetOccupancy( - device_description, smem_per_block, thread_dims, blocks_per_sm + 1); - if (improved_regs_per_thread != 0) { - VLOG(2) << "Reducing register usage from " << regs_per_thread - << " to " << improved_regs_per_thread - << " could increase resident blocks per SM by one."; - } else { - VLOG(2) << "Resident blocks per SM cannot be increased by reducing " - "register usage."; + int suggested_threads = + CompareOccupancy(&blocks_per_sm, device_description, regs_per_thread, + smem_per_block, thread_dims, cufunc); + if (suggested_threads != 0) { + VLOG(2) << "The cuda occupancy calculator reccommends using " + << suggested_threads + << " threads per block to acheive an occupancy of " << blocks_per_sm + << " blocks per SM."; } } @@ -980,144 +973,6 @@ static int TryToReadNumaNode(const string &pci_bus_id, int device_ordinal) { #endif } -// Set of compute capability specific device parameters that cannot be -// queried from the driver API. These values instead are baked into a -// lookup table indexed by compute capability version. -struct UnqueryableDeviceParams { - int cc_major; - int cc_minor; - uint64 blocks_per_core_limit; - uint64 registers_per_core_limit; - uint64 registers_per_thread_limit; - uint64 warp_alloc_granularity; - uint64 register_alloc_granularity; - uint64 shared_memory_alloc_granularity; -}; - -// http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#compute-capabilities -// https://developer.download.nvidia.com/compute/cuda/CUDA_Occupancy_calculator.xls -static const UnqueryableDeviceParams kAllUnqueryableDeviceParams[] = { - { - 2, 0, // compute capability (2.0) - 8, // blocks_per_core_limit - 32 * 1024, // registers_per_core_limit - 63, // registers_per_thread_limit - 2, // warp_alloc_granularity - 64, // register_alloc_granularity - 128, // shared_memory_alloc_granularity - }, - { - 2, 1, // compute capability (2.1) - 8, // blocks_per_core_limit - 32 * 1024, // registers_per_core_limit - 63, // registers_per_thread_limit - 2, // warp_alloc_granularity - 64, // register_alloc_granularity - 128, // shared_memory_alloc_granularity - }, - { - 3, 0, // compute capability (3.0) - 16, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 63, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 3, 2, // compute capability (3.2) - 16, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 3, 5, // compute capability (3.5) - 16, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 3, 7, // compute capability (3.7) - 16, // blocks_per_core_limit - 128 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 5, 0, // compute capability (5.0) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 5, 2, // compute capability (5.2) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 5, 3, // compute capability (5.3) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 6, 0, // compute capability (6.0) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 2, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 6, 1, // compute capability (6.1) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - { - 6, 2, // compute capability (6.2) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 4, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, - // TODO(jlebar): Confirm the alloc granularity values for sm_70. These are - // not published in the spreadsheet linked above. Currently we guess that - // they're the same as sm_60. - { - 7, 0, // compute capability (7.0) - 32, // blocks_per_core_limit - 64 * 1024, // registers_per_core_limit - 255, // registers_per_thread_limit - 2, // warp_alloc_granularity - 256, // register_alloc_granularity - 256, // shared_memory_alloc_granularity - }, -}; DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { internal::DeviceDescriptionBuilder builder; @@ -1193,19 +1048,6 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { builder.set_name(device_name); } - for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { - const auto ¶ms = kAllUnqueryableDeviceParams[i]; - if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) { - builder.set_blocks_per_core_limit(params.blocks_per_core_limit); - builder.set_registers_per_core_limit(params.registers_per_core_limit); - builder.set_registers_per_thread_limit(params.registers_per_thread_limit); - builder.set_warp_alloc_granularity(params.warp_alloc_granularity); - builder.set_register_alloc_granularity(params.register_alloc_granularity); - builder.set_shared_memory_alloc_granularity( - params.shared_memory_alloc_granularity); - } - } - builder.set_platform_version( port::StrCat("Compute Capability ", cc_major_, ".", cc_minor_)); @@ -1227,6 +1069,10 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { CUDADriver::GetMaxRegistersPerBlock(device_).ValueOrDie()); builder.set_threads_per_warp( CUDADriver::GetThreadsPerWarp(device_).ValueOrDie()); + builder.set_registers_per_core_limit( + CUDADriver::GetDeviceAttribute( + CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR, device_) + .ValueOrDie()); auto built = builder.Build(); return built.release(); diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc index 8ca0677f8a..df52ce6cce 100644 --- a/tensorflow/stream_executor/device_description.cc +++ b/tensorflow/stream_executor/device_description.cc @@ -37,16 +37,11 @@ DeviceDescription::DeviceDescription() kUninitializedUint64), block_dim_limit_(kUninitializedUint64, kUninitializedUint64, kUninitializedUint64), - blocks_per_core_limit_(kUninitializedUint64), threads_per_core_limit_(kUninitializedUint64), threads_per_block_limit_(kUninitializedUint64), threads_per_warp_(kUninitializedUint64), registers_per_core_limit_(kUninitializedUint64), registers_per_block_limit_(kUninitializedUint64), - registers_per_thread_limit_(kUninitializedUint64), - warp_alloc_granularity_(1), - register_alloc_granularity_(1), - shared_memory_alloc_granularity_(1), device_address_bits_(kUninitializedUint64), device_memory_size_(kUninitializedUint64), memory_bandwidth_(kUninitializedUint64), @@ -162,75 +157,36 @@ static uint64 RoundDown(uint64 value, uint64 n) { return port::MathUtil::FloorOfRatio(value, n) * n; } -uint64 CalculateOccupancy(const DeviceDescription &device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim &thread_dims) { - // Don't try to compute occupancy if necessary values are not initialized. - uint64 required_fields[] = { device_description.registers_per_thread_limit(), - device_description.threads_per_warp(), - device_description.warp_alloc_granularity(), - device_description.register_alloc_granularity(), - device_description.registers_per_block_limit(), - device_description.shared_memory_per_core(), - device_description.blocks_per_core_limit() }; - for (auto value : required_fields) { - if (value == kUninitializedUint64) { - return 0; - } - } - - if (registers_per_thread > device_description.registers_per_thread_limit()) { - return 0; - } - - uint64 warps_per_block = - port::MathUtil::CeilOfRatio(thread_dims.x * thread_dims.y * thread_dims.z, - device_description.threads_per_warp()); - - // Warp resources are allocated at a particular granularity. This value is - // the effective number of warps for resource allocation purposes. - uint64 alloc_warps_per_block = - RoundUp(warps_per_block, device_description.warp_alloc_granularity()); - - uint64 alloc_regs_per_warp = - RoundUp(device_description.threads_per_warp() * registers_per_thread, - device_description.register_alloc_granularity()); - uint64 regs_per_block = alloc_warps_per_block * alloc_regs_per_warp; - uint64 reg_limit = - device_description.registers_per_block_limit() / regs_per_block; - - uint64 alloc_smem_per_block = RoundUp( - shared_memory_per_block, - device_description.shared_memory_alloc_granularity()); - uint64 smem_limit = alloc_smem_per_block > 0 ? - device_description.shared_memory_per_core() / alloc_smem_per_block : - device_description.blocks_per_core_limit(); - - uint64 thread_limit = device_description.threads_per_core_limit() - / (warps_per_block * device_description.threads_per_warp()); - - return std::min({ device_description.blocks_per_core_limit(), - reg_limit, smem_limit, thread_limit }); +int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func) { + int suggested_blocks = 0; + int suggested_threads = 0; + CUresult err = + cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, + func, NULL, shared_memory_per_block, 0); + CHECK_EQ(err, CUDA_SUCCESS); + return suggested_blocks; } -uint64 CalculateRegisterLimitForTargetOccupancy( - const DeviceDescription &device_description, uint64 shared_memory_per_block, - const ThreadDim &thread_dims, uint64 target_blocks_per_core) { - // Linear search from maximum number of registers down until the target - // blocks per SM is found. - // TODO(meheff): Compute this using a closed form solution. - int reg_step = device_description.register_alloc_granularity() / - device_description.threads_per_warp(); - for (int r = device_description.registers_per_thread_limit(); r > 0; - r = RoundDown(r - 1, reg_step)) { - uint64 occupancy = CalculateOccupancy( - device_description, r, shared_memory_per_block, thread_dims); - if (occupancy >= target_blocks_per_core) { - return r; - } +int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func) { + int suggested_blocks = 0; + int suggested_threads = 0; + CUresult err = + cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, + func, NULL, shared_memory_per_block, 0); + CHECK_EQ(err, CUDA_SUCCESS); + if (suggested_blocks > *initial_blocks) { + *initial_blocks = suggested_blocks; + return suggested_threads; + } else { + return 0; } - return 0; } } // namespace stream_executor diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h index 7f99d81ef3..d335b9b875 100644 --- a/tensorflow/stream_executor/device_description.h +++ b/tensorflow/stream_executor/device_description.h @@ -24,6 +24,7 @@ limitations under the License. #include #include "tensorflow/stream_executor/platform/port.h" +#include "tensorflow/stream_executor/cuda/cuda_driver.h" #include "tensorflow/stream_executor/launch_dim.h" #include "tensorflow/stream_executor/platform/port.h" @@ -79,10 +80,6 @@ class DeviceDescription { // legitimate kernel launch request. const BlockDim &block_dim_limit() const { return block_dim_limit_; } - // Returns the limit on the number of simultaneously resident blocks - // on a multiprocessor. - uint64 blocks_per_core_limit() const { return blocks_per_core_limit_; } - // Returns the limit on the total number of threads that can be launched in a // single block; i.e. the limit on x * y * z dimensions of a ThreadDim. // This limit affects what constitutes a legitimate kernel launch request. @@ -110,27 +107,6 @@ class DeviceDescription { return registers_per_block_limit_; } - // Returns the limit on the total number of registers that can be - // allocated to a thread. - const uint64 ®isters_per_thread_limit() const { - return registers_per_thread_limit_; - } - - // Returns the granularity at which warps are allocated resources. - const uint64 &warp_alloc_granularity() const { - return warp_alloc_granularity_; - } - - // Returns the granularity at which registers are allocated to warps. - const uint64 ®ister_alloc_granularity() const { - return register_alloc_granularity_; - } - - // Returns the granularity at which shared memory is allocated to warps. - const uint64 &shared_memory_alloc_granularity() const { - return shared_memory_alloc_granularity_; - } - // Returns the number of address bits available to kernel code running on the // platform. This affects things like the maximum allocation size and perhaps // types used in kernel code such as size_t. @@ -200,19 +176,12 @@ class DeviceDescription { ThreadDim thread_dim_limit_; BlockDim block_dim_limit_; - uint64 blocks_per_core_limit_; - uint64 threads_per_core_limit_; uint64 threads_per_block_limit_; uint64 threads_per_warp_; uint64 registers_per_core_limit_; uint64 registers_per_block_limit_; - uint64 registers_per_thread_limit_; - - uint64 warp_alloc_granularity_; - uint64 register_alloc_granularity_; - uint64 shared_memory_alloc_granularity_; uint64 device_address_bits_; uint64 device_memory_size_; @@ -270,10 +239,6 @@ class DeviceDescriptionBuilder { device_description_->block_dim_limit_ = value; } - void set_blocks_per_core_limit(uint64 value) { - device_description_->blocks_per_core_limit_ = value; - } - void set_threads_per_core_limit(uint64 value) { device_description_->threads_per_core_limit_ = value; } @@ -290,19 +255,6 @@ class DeviceDescriptionBuilder { void set_registers_per_block_limit(uint64 value) { device_description_->registers_per_block_limit_ = value; } - void set_registers_per_thread_limit(uint64 value) { - device_description_->registers_per_thread_limit_ = value; - } - - void set_warp_alloc_granularity(uint64 value) { - device_description_->warp_alloc_granularity_ = value; - } - void set_register_alloc_granularity(uint64 value) { - device_description_->register_alloc_granularity_ = value; - } - void set_shared_memory_alloc_granularity(uint64 value) { - device_description_->shared_memory_alloc_granularity_ = value; - } void set_device_address_bits(uint64 value) { device_description_->device_address_bits_ = value; @@ -375,17 +327,18 @@ void CalculateDimensionality(const DeviceDescription &device_description, // Compute and return maximum blocks per core (occupancy) based on the // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. -uint64 CalculateOccupancy(const DeviceDescription &device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim &thread_dims); - -// Compute and return the maximum number of registers per thread which -// achieves the target occupancy. If the target is not possible then -// zero is returned. -uint64 CalculateRegisterLimitForTargetOccupancy( - const DeviceDescription &device_description, uint64 shared_memory_per_block, - const ThreadDim &thread_dims, uint64 target_blocks_per_core); +int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); + +// Compute and return the suggested thread count to acheive ideal occupancy. +// If the provided thread dimensions match this number, zero is returned. +int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); } // namespace stream_executor -- GitLab From 4e72dd865a3fc83baa69f6b7c08720a1b546a464 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 29 Aug 2018 17:05:43 +0800 Subject: [PATCH 011/570] Refine LeakyRelu codes. 1. Add C++ gradient of gradient definition of LeakyReLu and revalant UT. 2. Using forward compatibility layer for python code changes. --- tensorflow/cc/gradients/nn_grad.cc | 18 ++++- tensorflow/cc/gradients/nn_grad_test.cc | 16 +++++ .../python/kernel_tests/relu_op_test.py | 70 ++++++++++--------- tensorflow/python/ops/nn_ops.py | 5 +- 4 files changed, 73 insertions(+), 36 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 0fc23d0bf7..2a32a2ed6f 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -149,13 +149,27 @@ Status LeakyReluGradHelper(const Scope& scope, const Operation& op, float alpha; TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); internal::LeakyReluGrad::Attrs attrs; - attrs.Alpha(alpha); - auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), attrs); + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(0), + attrs.Alpha(alpha)); grad_outputs->push_back(dx); return scope.status(); } REGISTER_GRADIENT_OP("LeakyRelu", LeakyReluGradHelper); +Status LeakyReluGradGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + float alpha; + TF_RETURN_IF_ERROR(GetNodeAttr(op.node()->attrs(), "alpha", &alpha)); + internal::LeakyReluGrad::Attrs attrs; + auto dx = internal::LeakyReluGrad(scope, grad_inputs[0], op.input(1), + attrs.Alpha(alpha)); + grad_outputs->push_back(dx); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("LeakyReluGrad", LeakyReluGradGradHelper); + Status EluGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index 5ebece7b6e..bf0db1f59d 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/cc/framework/gradient_checker.h" #include "tensorflow/cc/framework/testutil.h" #include "tensorflow/cc/gradients/grad_testutil.h" +#include "tensorflow/cc/ops/nn_ops_internal.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -173,6 +174,21 @@ TEST_F(NNGradTest, LeakyReluGrad) { RunTest(x, x_init_value, y, shape); } +TEST_F(NNGradTest, LeakyReluGradGrad) { + TensorShape shape({5, 2}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); + // Avoid input values where Leaky ReLU gradient is not well defined (around + // zero). + Tensor x_init_value = test::AsTensor( + {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, + {5, 2}); + Tensor features = test::AsTensor( + {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, + {5, 2}); + auto y = ops::internal::LeakyReluGrad(scope_, x, features); + RunTest(x, x_init_value, y, shape); +} + TEST_F(NNGradTest, EluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index ccb3a231bb..7066f28883 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -21,6 +21,7 @@ from __future__ import print_function import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.python.compat import compat from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops @@ -283,8 +284,9 @@ class LeakyReluTest(test.TestCase): np.array([[-9, 7, -5, 3, -1], [1, -3, 5, -7, 9]]).astype(t), alpha=0.1, use_gpu=True) - # The gradient test for ReLU is a bit tricky as the derivative is not well - # defined at around zero and we want to avoid that in terms of input values. + # The gradient test for Leaky ReLU is a bit tricky as the derivative is not + # well defined at around zero and we want to avoid that in terms of input + # values. def testGradientFloat32(self): with self.test_session(): x = constant_op.constant( @@ -319,39 +321,41 @@ class LeakyReluTest(test.TestCase): self.assertLess(err, 1e-10) def testGradGradFloat32(self): - with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - name="x") - y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float32, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) - print("leaky_relu (float32) gradient of gradient err = ", err) - self.assertLess(err, 1e-4) + with compat.forward_compatibility_horizon(2018, 10, 2): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float32) gradient of gradient err = ", err) + self.assertLess(err, 1e-4) def testGradGradFloat64(self): - with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - dtype=dtypes.float64, - name="x") - y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float64, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) - print("leaky_relu (float64) gradient of gradient err = ", err) - self.assertLess(err, 1e-10) + with compat.forward_compatibility_horizon(2018, 10, 2): + with self.test_session(): + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) + print("leaky_relu (float64) gradient of gradient err = ", err) + self.assertLess(err, 1e-10) def testGradientScalar(self): with self.test_session() as sess: diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 31b8f3945d..52ea202636 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1601,7 +1601,10 @@ def leaky_relu(features, alpha=0.2, name=None): features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) - return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) + if compat.forward_compatible(2018, 10, 1): + return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) + alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") + return math_ops.maximum(alpha * features, features, name=name) def _flatten_outer_dims(logits): -- GitLab From 2586eb3bfeeef3af357e438ae5aff92d2bac12a5 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Mon, 3 Sep 2018 11:48:35 +0800 Subject: [PATCH 012/570] Code fix against ci_build error results. --- tensorflow/cc/gradients/nn_grad_test.cc | 3 +- tensorflow/core/kernels/relu_op.cc | 8 +-- tensorflow/core/kernels/relu_op.h | 8 +-- tensorflow/core/kernels/relu_op_functor.h | 1 - .../python/kernel_tests/relu_op_test.py | 50 +++++++++---------- .../tools/api/golden/v1/tensorflow.pbtxt | 4 ++ 6 files changed, 39 insertions(+), 35 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index bf0db1f59d..d8c2a1a0fc 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -180,8 +180,7 @@ TEST_F(NNGradTest, LeakyReluGradGrad) { // Avoid input values where Leaky ReLU gradient is not well defined (around // zero). Tensor x_init_value = test::AsTensor( - {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, - {5, 2}); + {2.3f, 1.9f, 1.5f, 1.1f, 0.7f, 0.3f, -0.1f, -0.5f, -0.9f, -1.3f}, {5, 2}); Tensor features = test::AsTensor( {-0.9f, -0.7f, -0.5f, -0.3f, -0.1f, 0.1f, 0.3f, 0.5f, 0.7f, 0.9f}, {5, 2}); diff --git a/tensorflow/core/kernels/relu_op.cc b/tensorflow/core/kernels/relu_op.cc index c4f2ef5632..cafa49cbb6 100644 --- a/tensorflow/core/kernels/relu_op.cc +++ b/tensorflow/core/kernels/relu_op.cc @@ -106,15 +106,15 @@ namespace functor { \ template <> \ void LeakyRelu::operator()( \ - const GPUDevice& d, typename TTypes::ConstTensor features, \ - T alpha, typename TTypes::Tensor activations); \ + const GPUDevice& d, typename TTypes::ConstTensor features, T alpha, \ + typename TTypes::Tensor activations); \ extern template struct LeakyRelu; \ \ template <> \ void LeakyReluGrad::operator()( \ const GPUDevice& d, typename TTypes::ConstTensor gradients, \ - typename TTypes::ConstTensor features, \ - T alpha, typename TTypes::Tensor backprops); \ + typename TTypes::ConstTensor features, T alpha, \ + typename TTypes::Tensor backprops); \ extern template struct LeakyReluGrad; \ \ template <> \ diff --git a/tensorflow/core/kernels/relu_op.h b/tensorflow/core/kernels/relu_op.h index c55190065c..fa79ab03ae 100644 --- a/tensorflow/core/kernels/relu_op.h +++ b/tensorflow/core/kernels/relu_op.h @@ -143,8 +143,8 @@ class LeakyReluOp : public UnaryElementWiseOp> { void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { functor::LeakyRelu functor; - functor(context->eigen_device(), input.flat(), - alpha_, output->flat()); + functor(context->eigen_device(), input.flat(), alpha_, + output->flat()); } private: @@ -183,7 +183,9 @@ class LeakyReluGradOp template void LeakyReluGradOp::OperateNoTemplate(OpKernelContext* context, - const Tensor& g, const Tensor& a, T alpha, Tensor* output) { + const Tensor& g, + const Tensor& a, T alpha, + Tensor* output) { if (!ReluHelpers::ValidateSameSize(context, g, a)) return; functor::LeakyReluGrad functor; functor(context->eigen_device(), g.flat(), a.flat(), alpha, diff --git a/tensorflow/core/kernels/relu_op_functor.h b/tensorflow/core/kernels/relu_op_functor.h index 7f0951451d..548d5a277d 100644 --- a/tensorflow/core/kernels/relu_op_functor.h +++ b/tensorflow/core/kernels/relu_op_functor.h @@ -91,7 +91,6 @@ struct Relu6Grad { } }; - // Functor used by LeakyReluOp to do the computations. template struct LeakyRelu { diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 7066f28883..3e24b8a2c4 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -323,37 +323,37 @@ class LeakyReluTest(test.TestCase): def testGradGradFloat32(self): with compat.forward_compatibility_horizon(2018, 10, 2): with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - name="x") - y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float32, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + name="x") + y = nn_ops.leaky_relu(x, alpha=0.1, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float32, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("leaky_relu (float32) gradient of gradient err = ", err) self.assertLess(err, 1e-4) def testGradGradFloat64(self): with compat.forward_compatibility_horizon(2018, 10, 2): with self.test_session(): - x = constant_op.constant( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - shape=[2, 5], - dtype=dtypes.float64, - name="x") - y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") - z = gradients_impl.gradients(y, x) - x_init = np.asarray( - [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], - dtype=np.float64, - order="F") - err = gradient_checker.compute_gradient_error( - x, [2, 5], z[0], [2, 5], x_init_value=x_init) + x = constant_op.constant( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + shape=[2, 5], + dtype=dtypes.float64, + name="x") + y = nn_ops.leaky_relu(x, alpha=0.02, name="leaky_relu") + z = gradients_impl.gradients(y, x) + x_init = np.asarray( + [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]], + dtype=np.float64, + order="F") + err = gradient_checker.compute_gradient_error( + x, [2, 5], z[0], [2, 5], x_init_value=x_init) print("leaky_relu (float64) gradient of gradient err = ", err) self.assertLess(err, 1e-10) diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 4de662fe33..9e8d320f06 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1324,6 +1324,10 @@ tf_module { name: "lbeta" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "leaky_relu" + argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], " + } member_method { name: "less" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Mon, 3 Sep 2018 12:10:51 +0800 Subject: [PATCH 013/570] Add XLA support for LeakyReluOp. Code contributed by: Meng Chen --- tensorflow/compiler/tests/binary_ops_test.py | 7 ++++ tensorflow/compiler/tests/unary_ops_test.py | 5 +++ tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 0aafda7fb4..8941dd4e27 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -178,6 +178,13 @@ class BinaryOpsTest(xla_test.XLATestCase): [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) + self._testBinary( + gen_nn_ops._leaky_relu_grad, + np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), + np.array( + [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype), + expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype)) + self._testBinary( gen_nn_ops.softmax_cross_entropy_with_logits, np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype), diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index 73adb0d243..91f876fa23 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -361,6 +361,11 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([[-0.05, 6.05, 5]], dtype=dtype), expected=np.array([[0, 6, 5]], dtype=dtype)) + self._assertOpOutputMatchesExpected( + nn_ops.leaky_relu, + np.array([[-1.0, 1.0]], dtype=dtype), + expected=np.array([[-0.2, 1.0]], dtype=dtype)) + self._assertOpOutputMatchesExpected( nn_ops.softmax, np.array([1, 2, 3, 4], dtype=dtype), diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index d35777ccb1..ec14735884 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -50,6 +50,24 @@ class Relu6Op : public XlaOpKernel { } }; + +class LeakyReluOp : public XlaOpKernel { + public: + explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); + } + // Compute the max of the input x and alpha*x. + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* builder = ctx->builder(); + auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0), + static_cast(alpha_)); + ctx->SetOutput(0, + xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); + } + private: + float alpha_; +}; + class ReluGradOp : public XlaOpKernel { public: explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} @@ -84,10 +102,34 @@ class Relu6GradOp : public XlaOpKernel { } }; +class LeakyReluGradOp : public XlaOpKernel { + public: + explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); + } + // Return the lhs (incoming gradient) if the rhs (input feature) > 0, + // otherwise return the alpha * lhs. + void Compile(XlaOpKernelContext* ctx) override { + xla::XlaBuilder* b = ctx->builder(); + const TensorShape shape = ctx->InputShape(0); + const auto zero = + xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); + const auto pred = xla::Gt(ctx->Input(1), zero); + auto alpha = XlaHelpers::FloatLiteral(b, input_type(0), + static_cast(alpha_)); + ctx->SetOutput(0, + xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); + } + private: + float alpha_; +}; + REGISTER_XLA_OP(Name("Relu"), ReluOp); REGISTER_XLA_OP(Name("Relu6"), Relu6Op); +REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp); REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp); REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp); +REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp); } // namespace } // namespace tensorflow -- GitLab From fa20b59b920233d35bb8da3fbc3c234c369a8291 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Tue, 4 Sep 2018 14:20:40 -0700 Subject: [PATCH 014/570] Move CUDA-specific occupancy calculation into proper file -Maintain functionality, just move CalculateOccupancy() and CompareOccupancy() methods from device_description to cuda_gpu_executor -Remove CUDA requirement in general class device_description --- .../stream_executor/cuda/cuda_gpu_executor.cc | 37 +++++++++++++++++++ .../stream_executor/cuda/cuda_gpu_executor.h | 11 ++++++ .../stream_executor/device_description.cc | 32 ---------------- .../stream_executor/device_description.h | 17 --------- 4 files changed, 48 insertions(+), 49 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 39b0696c93..458c0e3030 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -490,6 +490,43 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, } } +// Compute and return maximum blocks per core (occupancy) based on the +// device description, some kernel characteristics and the number of threads per +// block. If unable to compute occupancy, zero is returned. +int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func) { + int suggested_blocks = 0; + int suggested_threads = 0; + CUresult err = + cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, + func, NULL, shared_memory_per_block, 0); + CHECK_EQ(err, CUDA_SUCCESS); + return suggested_blocks; +} + +// Compute and return the suggested thread count to acheive ideal occupancy. +// If the provided thread dimensions match this number, zero is returned. +int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func) { + int suggested_blocks = 0; + int suggested_threads = 0; + CUresult err = + cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, + func, NULL, shared_memory_per_block, 0); + CHECK_EQ(err, CUDA_SUCCESS); + if (suggested_blocks > *initial_blocks) { + *initial_blocks = suggested_blocks; + return suggested_threads; + } else { + return 0; + } +} + void *CUDAExecutor::Allocate(uint64 size) { return CUDADriver::DeviceAllocate(context_, size); } diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index 8a954d5461..e8ebbc3220 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -70,6 +70,17 @@ class CUDAExecutor : public internal::StreamExecutorInterface { const BlockDim &block_dims, const KernelBase &k, const KernelArgsArrayBase &args) override; + int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); + + int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); + void *Allocate(uint64 size) override; void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes, diff --git a/tensorflow/stream_executor/device_description.cc b/tensorflow/stream_executor/device_description.cc index df52ce6cce..726c4adf74 100644 --- a/tensorflow/stream_executor/device_description.cc +++ b/tensorflow/stream_executor/device_description.cc @@ -157,36 +157,4 @@ static uint64 RoundDown(uint64 value, uint64 n) { return port::MathUtil::FloorOfRatio(value, n) * n; } -int CalculateOccupancy(const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { - int suggested_blocks = 0; - int suggested_threads = 0; - CUresult err = - cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, - func, NULL, shared_memory_per_block, 0); - CHECK_EQ(err, CUDA_SUCCESS); - return suggested_blocks; -} - -int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { - int suggested_blocks = 0; - int suggested_threads = 0; - CUresult err = - cuOccupancyMaxPotentialBlockSize(&suggested_blocks, &suggested_threads, - func, NULL, shared_memory_per_block, 0); - CHECK_EQ(err, CUDA_SUCCESS); - if (suggested_blocks > *initial_blocks) { - *initial_blocks = suggested_blocks; - return suggested_threads; - } else { - return 0; - } -} - } // namespace stream_executor diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h index d335b9b875..b15ce31216 100644 --- a/tensorflow/stream_executor/device_description.h +++ b/tensorflow/stream_executor/device_description.h @@ -24,7 +24,6 @@ limitations under the License. #include #include "tensorflow/stream_executor/platform/port.h" -#include "tensorflow/stream_executor/cuda/cuda_driver.h" #include "tensorflow/stream_executor/launch_dim.h" #include "tensorflow/stream_executor/platform/port.h" @@ -324,22 +323,6 @@ void CalculateDimensionality(const DeviceDescription &device_description, uint64 element_count, uint64 *threads_per_block, uint64 *block_count); -// Compute and return maximum blocks per core (occupancy) based on the -// device description, some kernel characteristics and the number of threads per -// block. If unable to compute occupancy, zero is returned. -int CalculateOccupancy(const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); - -// Compute and return the suggested thread count to acheive ideal occupancy. -// If the provided thread dimensions match this number, zero is returned. -int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); - } // namespace stream_executor #endif // TENSORFLOW_STREAM_EXECUTOR_DEVICE_DESCRIPTION_H_ -- GitLab From cd6597b8fcd82b51ddb47a297972a1614c2a5d78 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Tue, 4 Sep 2018 16:17:40 -0700 Subject: [PATCH 015/570] Fixed transition typo --- tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 458c0e3030..a961e9a6c4 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -493,7 +493,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, // Compute and return maximum blocks per core (occupancy) based on the // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. -int CalculateOccupancy(const DeviceDescription& device_description, +int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, const ThreadDim& thread_dims, CUfunction func) { @@ -508,7 +508,7 @@ int CalculateOccupancy(const DeviceDescription& device_description, // Compute and return the suggested thread count to acheive ideal occupancy. // If the provided thread dimensions match this number, zero is returned. -int CompareOccupancy(int* initial_blocks, +int CUDAExecutor::CompareOccupancy(int* initial_blocks, const DeviceDescription& device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, -- GitLab From 475b7715f16ad0f94fa9986a0eefc1b2cf2044bd Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Tue, 4 Sep 2018 16:31:01 -0700 Subject: [PATCH 016/570] Recommended typo fix --- tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index a961e9a6c4..ce2f1ce3ae 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -483,7 +483,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, CompareOccupancy(&blocks_per_sm, device_description, regs_per_thread, smem_per_block, thread_dims, cufunc); if (suggested_threads != 0) { - VLOG(2) << "The cuda occupancy calculator reccommends using " + VLOG(2) << "The cuda occupancy calculator recommends using " << suggested_threads << " threads per block to acheive an occupancy of " << blocks_per_sm << " blocks per SM."; -- GitLab From a95281ce1b449d8f92a3799ff9c1dbf661b70bc4 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 5 Sep 2018 09:02:40 +0800 Subject: [PATCH 017/570] Avoid golden API file changing. --- tensorflow/cc/gradients/nn_grad_test.cc | 3 +-- tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt | 1 + tensorflow/tools/api/golden/v1/tensorflow.pbtxt | 4 ---- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index d8c2a1a0fc..f5a09e09dc 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -42,7 +42,6 @@ using ops::MaxPoolV2; using ops::Placeholder; using ops::Relu; using ops::Relu6; -using ops::LeakyRelu; using ops::Selu; using ops::Softmax; using ops::Softplus; @@ -165,7 +164,7 @@ TEST_F(NNGradTest, Relu6Grad) { TEST_F(NNGradTest, LeakyReluGrad) { TensorShape shape({5, 2}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape)); - auto y = LeakyRelu(scope_, x); + auto y = ops::internal::LeakyRelu(scope_, x); // Avoid input values where Leaky ReLU gradient is not well defined (around // zero). Tensor x_init_value = test::AsTensor( diff --git a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt index 4a61889f54..280148e032 100644 --- a/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_LeakyRelu.pbtxt @@ -1,4 +1,5 @@ op { graph_op_name: "LeakyRelu" + visibility: HIDDEN summary: "Computes rectified linear: `max(features, features * alpha)`." } diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 9e8d320f06..4de662fe33 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -1324,10 +1324,6 @@ tf_module { name: "lbeta" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "leaky_relu" - argspec: "args=[\'features\', \'alpha\', \'name\'], varargs=None, keywords=None, defaults=[\'0.2\', \'None\'], " - } member_method { name: "less" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From 69d3b8faf41791834301a74a05e288964940427d Mon Sep 17 00:00:00 2001 From: "Wen-Heng (Jack) Chung" Date: Fri, 22 Jun 2018 23:09:43 -0500 Subject: [PATCH 018/570] [ROCm] bazel build system and continuous integration logic The commit contains following components to support TensorFlow on ROCm platform - bazel build system - continuous integration logic Authors: - Jack Chung: jack.chung@amd.com - Jeffrey Poznanovic: Jeffrey.Poznanovic@amd.com - Peng Sun: Peng.Sun@amd.com --- configure.py | 20 + tensorflow/core/BUILD | 4 +- tensorflow/core/kernels/BUILD | 3 +- tensorflow/tensorflow.bzl | 67 +- tensorflow/tools/ci_build/Dockerfile.rocm | 97 +++ .../tools/ci_build/builds/docker_test.sh | 9 +- tensorflow/tools/ci_build/builds/pip.sh | 4 +- .../tools/ci_build/builds/with_the_same_user | 6 + tensorflow/tools/ci_build/ci_build.sh | 11 +- .../tools/ci_build/linux/cpu/run_cc_core.sh | 1 + .../tools/ci_build/linux/cpu/run_py2_core.sh | 1 + .../ci_build/linux/cpu/run_py3_contrib.sh | 1 + .../tools/ci_build/linux/cpu/run_py3_core.sh | 1 + .../tools/ci_build/linux/libtensorflow.sh | 3 + .../tools/ci_build/linux/libtensorflow_cpu.sh | 1 + .../ci_build/linux/libtensorflow_docker.sh | 6 + .../ci_build/linux/libtensorflow_rocm.sh | 22 + .../tools/ci_build/linux/rocm/run_cc_core.sh | 39 ++ .../tools/ci_build/linux/rocm/run_py3_core.sh | 39 ++ .../tools/ci_build/osx/cpu/run_py2_cc_core.sh | 1 + .../tools/ci_build/osx/libtensorflow_cpu.sh | 1 + .../tools/ci_build/osx/libtensorflow_gpu.sh | 1 + .../tools/ci_build/osx/libtensorflow_rocm.sh | 36 + .../tools/ci_build/xla/linux/rocm/run_py3.sh | 41 ++ tensorflow/workspace.bzl | 2 + .../gpus/crosstool/CROSSTOOL_hipcc.tpl | 158 +++++ .../bin/crosstool_wrapper_driver_rocm.tpl | 241 +++++++ third_party/gpus/rocm/BUILD | 0 third_party/gpus/rocm/BUILD.tpl | 99 +++ third_party/gpus/rocm/build_defs.bzl.tpl | 32 + third_party/gpus/rocm/rocm_config.h.tpl | 21 + third_party/gpus/rocm_configure.bzl | 663 ++++++++++++++++++ tools/bazel.rc | 3 + 33 files changed, 1611 insertions(+), 23 deletions(-) create mode 100644 tensorflow/tools/ci_build/Dockerfile.rocm create mode 100755 tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh create mode 100755 tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh create mode 100755 tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh create mode 100755 tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh create mode 100644 third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl create mode 100755 third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl create mode 100644 third_party/gpus/rocm/BUILD create mode 100644 third_party/gpus/rocm/BUILD.tpl create mode 100644 third_party/gpus/rocm/build_defs.bzl.tpl create mode 100644 third_party/gpus/rocm/rocm_config.h.tpl create mode 100644 third_party/gpus/rocm_configure.bzl diff --git a/configure.py b/configure.py index 361bd4764d..4f998511aa 100644 --- a/configure.py +++ b/configure.py @@ -1521,6 +1521,13 @@ def main(): else: set_trisycl_include_dir(environ_cp) + set_action_env_var(environ_cp, 'TF_NEED_ROCM', 'ROCm', False) + if (environ_cp.get('TF_NEED_ROCM') == '1' and + 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( + 'LD_LIBRARY_PATH') != '1'): + write_action_env_to_bazelrc('LD_LIBRARY_PATH', + environ_cp.get('LD_LIBRARY_PATH')) + set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False) if (environ_cp.get('TF_NEED_CUDA') == '1' and 'TF_CUDA_CONFIG_REPO' not in environ_cp): @@ -1561,6 +1568,19 @@ def main(): write_to_bazelrc('build --config=download_clang') write_to_bazelrc('test --config=download_clang') + # SYCL / ROCm / CUDA are mutually exclusive. + # At most 1 GPU platform can be configured. + gpu_platform_count = 0 + if environ_cp.get('TF_NEED_OPENCL_SYCL') == '1': + gpu_platform_count += 1 + if environ_cp.get('TF_NEED_ROCM') == '1': + gpu_platform_count += 1 + if environ_cp.get('TF_NEED_CUDA') == '1': + gpu_platform_count += 1 + if gpu_platform_count >= 2: + raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. ' + 'At most 1 GPU platform can be configured.') + set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False) if environ_cp.get('TF_NEED_MPI') == '1': set_mpi_home(environ_cp) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index c06fea130f..d5dfb8c813 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -146,7 +146,7 @@ load( "if_static", "tf_cuda_tests_tags", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured") load("@io_bazel_rules_closure//closure:defs.bzl", "closure_proto_library") load( "//third_party/mkl:build_defs.bzl", @@ -2941,7 +2941,7 @@ tf_cuda_library( "platform/device_tracer.h", ], copts = tf_copts(), - cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(), + cuda_deps = if_cuda_is_configured(tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps()), visibility = ["//visibility:private"], deps = [ ":core_cpu_internal", diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 25063ac823..68fa8fa481 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -55,7 +55,8 @@ load( "if_mkl_ml", "mkl_deps", ) -load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda", "if_cuda_is_configured") +load("@local_config_rocm//rocm:build_defs.bzl", "if_rocm", "if_rocm_is_configured") config_setting( # Add "--define tensorflow_xsmm=1" to your build command to use libxsmm for diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index adac895a17..f51a628ca3 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -17,8 +17,15 @@ load( ) load( "@local_config_cuda//cuda:build_defs.bzl", - "cuda_default_copts", "if_cuda", + "if_cuda_is_configured", + "cuda_default_copts", +) +load( + "@local_config_rocm//rocm:build_defs.bzl", + "if_rocm", + "if_rocm_is_configured", + "rocm_default_copts", ) load( "//third_party/mkl:build_defs.bzl", @@ -860,12 +867,14 @@ def tf_cuda_only_cc_test( srcs = srcs + tf_binary_additional_srcs(), size = size, args = args, - copts = _cuda_copts() + tf_copts(), + copts = _cuda_copts() + _rocm_copts() + tf_copts(), data = data + tf_binary_dynamic_kernel_dsos(kernels), - deps = deps + tf_binary_dynamic_kernel_deps(kernels) + if_cuda([ - clean_dep("//tensorflow/core:cuda"), - clean_dep("//tensorflow/core:gpu_lib"), - ]), + deps = deps + tf_binary_dynamic_kernel_deps(kernels) + + if_cuda_is_configured([ + clean_dep("//tensorflow/core:cuda"), + clean_dep("//tensorflow/core:gpu_lib")]) + + if_rocm_is_configured([ + clean_dep("//tensorflow/core:gpu_lib")]), linkopts = if_not_windows(["-lpthread", "-lm"]) + linkopts + _rpath_linkopts(name), linkstatic = linkstatic or select({ # cc_tests with ".so"s in srcs incorrectly link on Darwin @@ -1000,7 +1009,7 @@ register_extension_info( label_regex_for_dep = "{extension_name}", ) -def _cuda_copts(): +def _cuda_copts(opts = []): """Gets the appropriate set of copts for (maybe) CUDA compilation. If we're doing CUDA compilation, returns copts for our particular CUDA @@ -1016,13 +1025,31 @@ def _cuda_copts(): "@local_config_cuda//cuda:using_clang": ([ "-fcuda-flush-denormals-to-zero", ]), - }) + }) + if_cuda_is_configured(opts) + +def _rocm_copts(opts = []): + """Gets the appropriate set of copts for (maybe) ROCm compilation. + + If we're doing ROCm compilation, returns copts for our particular ROCm + compiler. If we're not doing ROCm compilation, returns an empty list. + + """ + return rocm_default_copts() + select({ + "//conditions:default": [], + "@local_config_rocm//rocm:using_hipcc": ([ + "", + ]) + }) + if_rocm_is_configured(opts) # Build defs for TensorFlow kernels # When this target is built using --config=cuda, a cc_library is built # that passes -DGOOGLE_CUDA=1 and '-x cuda', linking in additional # libraries needed by GPU kernels. +# +# When this target is built using --config=rocm, a cc_library is built +# that passes -DTENSORFLOW_USE_ROCM and '-x rocm', linking in additional +# libraries needed by GPU kernels. def tf_gpu_kernel_library( srcs, copts = [], @@ -1030,16 +1057,18 @@ def tf_gpu_kernel_library( deps = [], hdrs = [], **kwargs): - copts = copts + _cuda_copts() + if_cuda(cuda_copts) + tf_copts() + copts = copts + tf_copts() + _cuda_copts(opts = cuda_copts) + _rocm_copts(opts = cuda_copts) kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"] native.cc_library( srcs = srcs, hdrs = hdrs, copts = copts, - deps = deps + if_cuda([ + deps = deps + if_cuda_is_configured([ clean_dep("//tensorflow/core:cuda"), clean_dep("//tensorflow/core:gpu_lib"), + ]) + if_rocm_is_configured([ + clean_dep("//tensorflow/core:gpu_lib"), ]), alwayslink = 1, **kwargs @@ -1075,11 +1104,13 @@ def tf_cuda_library(deps = None, cuda_deps = None, copts = tf_copts(), **kwargs) kwargs["features"] = kwargs.get("features", []) + ["-use_header_modules"] native.cc_library( - deps = deps + if_cuda(cuda_deps + [ + deps = deps + if_cuda_is_configured(cuda_deps + [ clean_dep("//tensorflow/core:cuda"), - "@local_config_cuda//cuda:cuda_headers", + "@local_config_cuda//cuda:cuda_headers" + ]) + if_rocm_is_configured(cuda_deps + [ + "@local_config_rocm//rocm:rocm_headers" ]), - copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_mkl(["-DINTEL_MKL=1"]) + + copts = (copts + if_cuda(["-DGOOGLE_CUDA=1"]) + if_rocm(["-DTENSORFLOW_USE_ROCM=1"]) + if_mkl(["-DINTEL_MKL=1"]) + if_mkl_open_source_only(["-DINTEL_MKL_DNN_ONLY"]) + if_tensorrt(["-DGOOGLE_TENSORRT=1"])), **kwargs @@ -1459,6 +1490,9 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ "@local_config_cuda//cuda:cuda_headers", "@local_config_cuda//cuda:cudart_static", ] + rocm_deps = [ + clean_dep("//tensorflow/core:stream_executor_headers_lib"), + ] deps = deps + tf_custom_op_library_additional_deps() if gpu_srcs: basename = name.split(".")[0] @@ -1467,13 +1501,14 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ srcs = gpu_srcs, copts = _cuda_copts() + if_tensorrt(["-DGOOGLE_TENSORRT=1"]), features = if_cuda(["-use_header_modules"]), - deps = deps + if_cuda(cuda_deps), + deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps) ) cuda_deps.extend([":" + basename + "_gpu"]) + rocm_deps.extend([":" + basename + "_gpu"]) check_deps( name = name + "_check_deps", - deps = deps + if_cuda(cuda_deps), + deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps), disallowed_deps = [ clean_dep("//tensorflow/core:framework"), clean_dep("//tensorflow/core:lib"), @@ -1482,7 +1517,7 @@ def tf_custom_op_library(name, srcs = [], gpu_srcs = [], deps = [], linkopts = [ tf_cc_shared_object( name = name, srcs = srcs, - deps = deps + if_cuda(cuda_deps), + deps = deps + if_cuda_is_configured(cuda_deps) + if_rocm_is_configured(rocm_deps), data = if_static([name + "_check_deps"]), copts = tf_copts(is_external = True), features = ["windows_export_all_symbols"], diff --git a/tensorflow/tools/ci_build/Dockerfile.rocm b/tensorflow/tools/ci_build/Dockerfile.rocm new file mode 100644 index 0000000000..aadaa8bac1 --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.rocm @@ -0,0 +1,97 @@ +# This Dockerfile provides a starting point for a ROCm installation of +# MIOpen and tensorflow. +FROM ubuntu:xenial +MAINTAINER Jeff Poznanovic + +ARG DEB_ROCM_REPO=http://repo.radeon.com/rocm/apt/debian/ +ARG ROCM_PATH=/opt/rocm + +ENV DEBIAN_FRONTEND noninteractive +ENV TF_NEED_ROCM 1 +ENV HOME /root/ +RUN apt update && apt install -y wget software-properties-common + +# Add rocm repository +RUN apt-get clean all +RUN wget -qO - $DEB_ROCM_REPO/rocm.gpg.key | apt-key add - +RUN sh -c "echo deb [arch=amd64] $DEB_ROCM_REPO xenial main > /etc/apt/sources.list.d/rocm.list" + +# Install misc pkgs +RUN apt-get update --allow-insecure-repositories && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + clang-3.8 \ + clang-format-3.8 \ + clang-tidy-3.8 \ + cmake \ + cmake-qt-gui \ + ssh \ + curl \ + apt-utils \ + pkg-config \ + g++-multilib \ + git \ + libunwind-dev \ + libfftw3-dev \ + libelf-dev \ + libncurses5-dev \ + libpthread-stubs0-dev \ + vim \ + gfortran \ + libboost-program-options-dev \ + libssl-dev \ + libboost-dev \ + libboost-system-dev \ + libboost-filesystem-dev \ + rpm \ + libnuma-dev \ + virtualenv \ + python-pip \ + python3-pip \ + wget && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Install rocm pkgs +RUN apt-get update --allow-insecure-repositories && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ + rocm-dev rocm-libs rocm-utils \ + rocfft miopen-hip miopengemm rocblas hipblas rocrand \ + rocm-profiler cxlactivitylogger && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN cd ~ && git clone https://github.com/GPUOpen-ProfessionalCompute-Tools/HIP.git +RUN cd ~/HIP && mkdir -p build && cd build && cmake .. && make package -j && dpkg -i *.deb + +ENV HCC_HOME=$ROCM_PATH/hcc +ENV HIP_PATH=$ROCM_PATH/hip +ENV OPENCL_ROOT=$ROCM_PATH/opencl +ENV PATH="$HCC_HOME/bin:$HIP_PATH/bin:${PATH}" +ENV PATH="$ROCM_PATH/bin:${PATH}" +ENV PATH="$OPENCL_ROOT/bin:${PATH}" + +# Add target file to help determine which device(s) to build for +RUN echo -e "gfx803\ngfx900" >> /opt/rocm/bin/target.lst + +# Setup environment variables, and add those environment variables at the end of ~/.bashrc +ARG HCC_HOME=/opt/rocm/hcc +ARG HIP_PATH=/opt/rocm/hip +ARG PATH=$HCC_HOME/bin:$HIP_PATH/bin:$PATH + +# Copy and run the install scripts. +COPY install/*.sh /install/ +ARG DEBIAN_FRONTEND=noninteractive +RUN /install/install_bootstrap_deb_packages.sh +RUN add-apt-repository -y ppa:openjdk-r/ppa && \ + add-apt-repository -y ppa:george-edison55/cmake-3.x +RUN /install/install_deb_packages.sh +RUN /install/install_pip_packages.sh +RUN /install/install_bazel.sh +RUN /install/install_golang.sh + +# Set up the master bazelrc configuration file. +COPY install/.bazelrc /etc/bazel.bazelrc + +# Configure the build for our CUDA configuration. +ENV TF_NEED_ROCM 1 + diff --git a/tensorflow/tools/ci_build/builds/docker_test.sh b/tensorflow/tools/ci_build/builds/docker_test.sh index e337ea4b05..38891b60e5 100755 --- a/tensorflow/tools/ci_build/builds/docker_test.sh +++ b/tensorflow/tools/ci_build/builds/docker_test.sh @@ -19,7 +19,7 @@ # # Usage: docker_test.sh # Arguments: -# IMAGE_TYPE : Type of the image: (CPU|GPU) +# IMAGE_TYPE : Type of the image: (CPU|GPU|ROCM) # TAG : Docker image tag # WHL_PATH : Path to the whl file to be installed inside the docker image # @@ -60,6 +60,8 @@ if [[ "${IMAGE_TYPE}" == "cpu" ]]; then DOCKERFILE="tensorflow/tools/docker/Dockerfile" elif [[ "${IMAGE_TYPE}" == "gpu" ]]; then DOCKERFILE="tensorflow/tools/docker/Dockerfile.gpu" +elif [[ "${IMAGE_TYPE}" == "rocm" ]]; then + DOCKERFILE="tensorflow/tools/docker/Dockerfile.rocm" else die "Unrecognized image type: $1" fi @@ -106,13 +108,16 @@ if [ "${IMAGE_TYPE}" == "gpu" ]; then devices=$(\ls /dev/nvidia* | xargs -I{} echo '--device {}:{}') libs=$(\ls /usr/lib/x86_64-linux-gnu/libcuda.* | xargs -I{} echo '-v {}:{}') GPU_EXTRA_PARAMS="${devices} ${libs}" +elif [ "${IMAGE_TYPE}" == "rocm" ]; then + ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video" else GPU_EXTRA_PARAMS="" + ROCM_EXTRA_PARAMS="" fi # Run docker image with source directory mapped docker run -v ${BASE_DIR}:/tensorflow-src -w /tensorflow-src \ -${GPU_EXTRA_PARAMS} \ +${GPU_EXTRA_PARAMS} ${ROCM_EXTRA_PARAMS} \ "${DOCKER_IMG_TAG}" \ /bin/bash -c "tensorflow/tools/ci_build/builds/run_pip_tests.sh && "\ "tensorflow/tools/ci_build/builds/test_tutorials.sh && "\ diff --git a/tensorflow/tools/ci_build/builds/pip.sh b/tensorflow/tools/ci_build/builds/pip.sh index fef121ab5a..6543779022 100755 --- a/tensorflow/tools/ci_build/builds/pip.sh +++ b/tensorflow/tools/ci_build/builds/pip.sh @@ -132,6 +132,7 @@ echo "Using Bazel flags: ${BAZEL_FLAGS}" PIP_BUILD_TARGET="//tensorflow/tools/pip_package:build_pip_package" GPU_FLAG="" if [[ ${CONTAINER_TYPE} == "cpu" ]] || \ + [[ ${CONTAINER_TYPE} == "rocm" ]] || \ [[ ${CONTAINER_TYPE} == "debian.jessie.cpu" ]]; then bazel build ${BAZEL_FLAGS} ${PIP_BUILD_TARGET} || \ die "Build failed." @@ -255,7 +256,8 @@ if [[ $(uname) == "Linux" ]]; then die "ERROR: Cannot find repaired wheel." fi # Copy and rename for gpu manylinux as we do not want auditwheel to package in libcudart.so - elif [[ ${CONTAINER_TYPE} == "gpu" ]]; then + elif [[ ${CONTAINER_TYPE} == "gpu" ]] || \ + [[ ${CONTAINER_TYPE} == "rocm" ]]; then WHL_PATH=${AUDITED_WHL_NAME} cp ${WHL_DIR}/${WHL_BASE_NAME} ${WHL_PATH} echo "Copied manylinx1 wheel file at ${WHL_PATH}" diff --git a/tensorflow/tools/ci_build/builds/with_the_same_user b/tensorflow/tools/ci_build/builds/with_the_same_user index b216e3549f..1cc5aed15d 100755 --- a/tensorflow/tools/ci_build/builds/with_the_same_user +++ b/tensorflow/tools/ci_build/builds/with_the_same_user @@ -48,6 +48,12 @@ getent passwd "${CI_BUILD_UID}" || adduser ${ADDUSER_OPTS} \ usermod -a -G sudo "${CI_BUILD_USER}" echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo +if [[ "${TF_NEED_ROCM}" -eq 1 ]]; then + # ROCm requires the video group in order to use the GPU for compute. If it + # exists on the host, add it to the container. + getent group video || addgroup video && adduser "${CI_BUILD_USER}" video +fi + if [ -e /root/.bazelrc ]; then cp /root/.bazelrc "${CI_BUILD_HOME}/.bazelrc" chown "${CI_BUILD_UID}:${CI_BUILD_GID}" "${CI_BUILD_HOME}/.bazelrc" diff --git a/tensorflow/tools/ci_build/ci_build.sh b/tensorflow/tools/ci_build/ci_build.sh index 77265e0f50..eab0616513 100755 --- a/tensorflow/tools/ci_build/ci_build.sh +++ b/tensorflow/tools/ci_build/ci_build.sh @@ -18,7 +18,7 @@ # # # CONTAINER_TYPE: Type of the docker container used the run the build: -# e.g., (cpu | gpu | android | tensorboard) +# e.g., (cpu | gpu | rocm | android | tensorboard) # # DOCKERFILE_PATH: (Optional) Path to the Dockerfile used for docker build. # If this optional value is not supplied (via the @@ -103,6 +103,14 @@ if [[ "${CONTAINER_TYPE}" != gpu* ]]; then GPU_EXTRA_PARAMS="" fi +# Add extra params for rocm devices and libraries for ROCm container. +if [[ "${CONTAINER_TYPE}" == "rocm" ]]; then + ROCM_EXTRA_PARAMS="--device=/dev/kfd --device=/dev/dri --group-add video" +else + ROCM_EXTRA_PARAMS="" +fi + + # Determine the docker image name DOCKER_IMG_NAME="${BUILD_TAG}.${CONTAINER_TYPE}" @@ -159,6 +167,7 @@ ${DOCKER_BINARY} run --rm --pid=host \ -v ${WORKSPACE}:/workspace \ -w /workspace \ ${GPU_EXTRA_PARAMS} \ + ${ROCM_EXTRA_PARAMS} \ ${CI_DOCKER_EXTRA_PARAMS[@]} \ "${DOCKER_IMG_NAME}" \ ${CI_COMMAND_PREFIX[@]} \ diff --git a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh index 8eeddcdb82..3b5c92d148 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_cc_core.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' # Only running cc tests, python version does not matter. export PYTHON_BIN_PATH=`which python` diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh index 8eca1987f0..52eff6330f 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py2_core.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=`which python2` yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh index f6fa9251d4..d12027599a 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_contrib.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=`which python3` yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh index 51eb2cd7e6..7c531a4d68 100755 --- a/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh +++ b/tensorflow/tools/ci_build/linux/cpu/run_py3_core.sh @@ -26,6 +26,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=`which python3` yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/linux/libtensorflow.sh b/tensorflow/tools/ci_build/linux/libtensorflow.sh index beef8e063b..3b6e15feb9 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow.sh @@ -27,5 +27,8 @@ SUFFIX="-cpu-linux-" if [ "${TF_NEED_CUDA}" == "1" ]; then SUFFIX="-gpu-linux-" fi +if [ "${TF_NEED_ROCM}" == "1" ]; then + SUFFIX="-rocm-linux-" +fi build_libtensorflow_tarball "${SUFFIX}$(uname -m)" diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh index 4bf34dd299..b76262b6e9 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_cpu.sh @@ -19,4 +19,5 @@ set -ex SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 "${SCRIPT_DIR}/libtensorflow_docker.sh" diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh index 60c974c36b..467b8dc808 100755 --- a/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh +++ b/tensorflow/tools/ci_build/linux/libtensorflow_docker.sh @@ -38,6 +38,11 @@ if [ "${TF_NEED_CUDA}" == "1" ]; then DOCKER_BINARY="nvidia-docker" DOCKER_FILE="Dockerfile.gpu" fi +if [ "${TF_NEED_ROCM}" == "1" ]; then + DOCKER_IMAGE="tf-tensorflow-rocm" + DOCKER_BINARY="docker" + DOCKER_FILE="Dockerfile.rocm" +fi docker build \ -t "${DOCKER_IMAGE}" \ @@ -53,6 +58,7 @@ ${DOCKER_BINARY} run \ -e "TF_NEED_HDFS=0" \ -e "TF_NEED_CUDA=${TF_NEED_CUDA}" \ -e "TF_NEED_TENSORRT=${TF_NEED_CUDA}" \ + -e "TF_NEED_ROCM=${TF_NEED_ROCM}" \ -e "TF_NEED_OPENCL_SYCL=0" \ "${DOCKER_IMAGE}" \ "/workspace/tensorflow/tools/ci_build/linux/libtensorflow.sh" diff --git a/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh new file mode 100755 index 0000000000..c1ebbe3630 --- /dev/null +++ b/tensorflow/tools/ci_build/linux/libtensorflow_rocm.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Script to build a binary releases of libtensorflow with GPU support. + +set -ex +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +export TF_NEED_ROCM=1 +"${SCRIPT_DIR}/libtensorflow_docker.sh" diff --git a/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh new file mode 100755 index 0000000000..200089f90e --- /dev/null +++ b/tensorflow/tools/ci_build/linux/rocm/run_cc_core.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_JOBS=$(grep -c ^processor /proc/cpuinfo) + +echo "" +echo "Bazel will use ${N_JOBS} concurrent job(s)." +echo "" + +# Run configure. +export PYTHON_BIN_PATH=`which python3` +export CC_OPT_FLAGS='-mavx' + +export TF_NEED_ROCM=1 + +yes "" | $PYTHON_BIN_PATH configure.py + +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \ + --test_lang_filters=cc --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \ + //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh new file mode 100755 index 0000000000..1d0b838c1b --- /dev/null +++ b/tensorflow/tools/ci_build/linux/rocm/run_py3_core.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_JOBS=$(grep -c ^processor /proc/cpuinfo) + +echo "" +echo "Bazel will use ${N_JOBS} concurrent job(s)." +echo "" + +# Run configure. +export PYTHON_BIN_PATH=`which python3` +export CC_OPT_FLAGS='-mavx' + +export TF_NEED_ROCM=1 + +yes "" | $PYTHON_BIN_PATH configure.py + +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test --config=rocm --test_tag_filters=-no_oss,-oss_serial,-no_gpu,-benchmark-test -k \ + --test_lang_filters=py --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --build_tests_only --test_output=errors --local_test_jobs=1 --config=opt \ + //tensorflow/... -//tensorflow/compiler/... -//tensorflow/contrib/... diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh index c7cc16e669..adee0d3171 100755 --- a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh +++ b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh @@ -27,6 +27,7 @@ echo "" # Run configure. export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2) yes "" | $PYTHON_BIN_PATH configure.py diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh index 9ae5fc6bea..06798adc03 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_cpu.sh @@ -26,6 +26,7 @@ source "${SCRIPT_DIR}/../builds/libtensorflow.sh" export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_HDFS=0 export TF_NEED_CUDA=0 +export TF_NEED_ROCM=0 export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh index d95fcdeb85..95f1992d7d 100755 --- a/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh +++ b/tensorflow/tools/ci_build/osx/libtensorflow_gpu.sh @@ -27,6 +27,7 @@ export TF_NEED_CUDA=1 export LD_LIBRARY_PATH="/usr/local/cuda/lib:/usr/local/cuda/extras/CUPTI/lib:${LD_LIBRARY_PATH}" export PYTHON_BIN_PATH="/usr/bin/python" export TF_NEED_HDFS=0 +export TF_NEED_ROCM=0 export TF_NEED_OPENCL_SYCL=0 export TF_NEED_MKL=0 export COMPUTECPP_PATH="/usr/local" diff --git a/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh new file mode 100755 index 0000000000..aeabc0e39e --- /dev/null +++ b/tensorflow/tools/ci_build/osx/libtensorflow_rocm.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# +# Script to produce binary release of libtensorflow (C API, Java jars etc.). + +set -ex +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# See comments at the top of this file for details. +source "${SCRIPT_DIR}/../builds/libtensorflow.sh" + +# Configure script +export TF_NEED_ROCM=1 +export PYTHON_BIN_PATH="/usr/bin/python" +export TF_NEED_GCP=0 +export TF_NEED_HDFS=0 +export TF_NEED_CUDA=0 +export TF_NEED_OPENCL_SYCL=0 +export TF_NEED_MKL=0 +export COMPUTECPP_PATH="/usr/local" + +export PATH="/usr/local/cuda/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" +build_libtensorflow_tarball "-gpu-darwin-$(uname -m)" diff --git a/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh new file mode 100755 index 0000000000..a0de128020 --- /dev/null +++ b/tensorflow/tools/ci_build/xla/linux/rocm/run_py3.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_JOBS=$(grep -c ^processor /proc/cpuinfo) + +echo "" +echo "Bazel will use ${N_JOBS} concurrent job(s)." +echo "" + +# Run configure. +export PYTHON_BIN_PATH=`which python3` + +export TF_NEED_ROCM=1 + +yes "" | $PYTHON_BIN_PATH configure.py +echo "build --distinct_host_configuration=false" >> .tf_configure.bazelrc + +bazel clean +# Run bazel test command. Double test timeouts to avoid flakes. +bazel test --config=rocm --test_tag_filters=-no_gpu,-benchmark-test,-no_oss -k \ + --jobs=${N_JOBS} --test_timeout 300,450,1200,3600 \ + --build_tests_only --test_output=errors --local_test_jobs=1 \ + --config=xla -- \ + //tensorflow/compiler/... diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 1e7c5d6790..87d1243563 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -1,6 +1,7 @@ # TensorFlow external dependencies that can be loaded in WORKSPACE files. load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") +load("//third_party/gpus:rocm_configure.bzl", "rocm_configure") load("//third_party/tensorrt:tensorrt_configure.bzl", "tensorrt_configure") load("//third_party:nccl/nccl_configure.bzl", "nccl_configure") load("//third_party/mkl:build_defs.bzl", "mkl_repository") @@ -43,6 +44,7 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): sycl_configure(name = "local_config_sycl") syslibs_configure(name = "local_config_syslibs") python_configure(name = "local_config_python") + rocm_configure(name="local_config_rocm") initialize_third_party() diff --git a/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl new file mode 100644 index 0000000000..0e175b3ef6 --- /dev/null +++ b/third_party/gpus/crosstool/CROSSTOOL_hipcc.tpl @@ -0,0 +1,158 @@ +major_version: "local" +minor_version: "" +default_target_cpu: "same_as_host" + +default_toolchain { + cpu: "k8" + toolchain_identifier: "local_linux" +} +default_toolchain { + cpu: "piii" + toolchain_identifier: "local_linux" +} +default_toolchain { + cpu: "arm" + toolchain_identifier: "local_linux" +} +default_toolchain { + cpu: "ppc" + toolchain_identifier: "local_linux" +} + +toolchain { + abi_version: "local" + abi_libc_version: "local" + builtin_sysroot: "" + compiler: "compiler" + host_system_name: "local" + needsPic: true + supports_gold_linker: false + supports_incremental_linker: false + supports_fission: false + supports_interface_shared_objects: false + supports_normalizing_ar: false + supports_start_end_lib: false + supports_thin_archives: false + target_libc: "local" + target_cpu: "local" + target_system_name: "local" + toolchain_identifier: "local_linux" + + tool_path { name: "ar" path: "/usr/bin/ar" } + tool_path { name: "compat-ld" path: "/usr/bin/ld" } + tool_path { name: "cpp" path: "/usr/bin/cpp" } + tool_path { name: "dwp" path: "/usr/bin/dwp" } + # As part of the TensorFlow release, we place some ROCm-related compilation + # files in @local_config_rocm//crosstool/clang/bin, and this relative + # path, combined with the rest of our Bazel configuration causes our + # compilation to use those files. + tool_path { name: "gcc" path: "clang/bin/crosstool_wrapper_driver_rocm" } + # Use "-std=c++11" for hipcc. For consistency, force both the host compiler + # and the device compiler to use "-std=c++11". + cxx_flag: "-std=c++11" + linker_flag: "-Wl,-no-as-needed" + linker_flag: "-lstdc++" + #linker_flag: "-B/usr/bin/" + linker_flag: "-B/opt/rocm/hcc/compiler/bin" + +%{host_compiler_includes} + tool_path { name: "gcov" path: "/usr/bin/gcov" } + + # C(++) compiles invoke the compiler (as that is the one knowing where + # to find libraries), but we provide LD so other rules can invoke the linker. + tool_path { name: "ld" path: "/usr/bin/ld" } + + tool_path { name: "nm" path: "/usr/bin/nm" } + tool_path { name: "objcopy" path: "/usr/bin/objcopy" } + objcopy_embed_flag: "-I" + objcopy_embed_flag: "binary" + tool_path { name: "objdump" path: "/usr/bin/objdump" } + tool_path { name: "strip" path: "/usr/bin/strip" } + + # Anticipated future default. + unfiltered_cxx_flag: "-no-canonical-prefixes" + + # Make C++ compilation deterministic. Use linkstamping instead of these + # compiler symbols. + unfiltered_cxx_flag: "-Wno-builtin-macro-redefined" + unfiltered_cxx_flag: "-D__DATE__=\"redacted\"" + unfiltered_cxx_flag: "-D__TIMESTAMP__=\"redacted\"" + unfiltered_cxx_flag: "-D__TIME__=\"redacted\"" + unfiltered_cxx_flag: "-D__HIP_PLATFORM_HCC__" + # The macro EIGEN_USE_HIP is used to tell Eigen to use the HIP platform headers + # It needs to be always set when compiling Eigen headers + # (irrespective of whether the source file is being compiled via HIPCC) + # so adding -DEIGEN_USE_HIP as a default CXX flag here + unfiltered_cxx_flag: "-DEIGEN_USE_HIP" + + + # Security hardening on by default. + # Conservative choice; -D_FORTIFY_SOURCE=2 may be unsafe in some cases. + # We need to undef it before redefining it as some distributions now have + # it enabled by default. + #compiler_flag: "-U_FORTIFY_SOURCE" + #compiler_flag: "-D_FORTIFY_SOURCE=1" + #compiler_flag: "-fstack-protector" + #compiler_flag: "-fPIE" + #linker_flag: "-pie" + #linker_flag: "-Wl,-z,relro,-z,now" + + # Enable coloring even if there's no attached terminal. Bazel removes the + # escape sequences if --nocolor is specified. This isn't supported by gcc + # on Ubuntu 14.04. + # compiler_flag: "-fcolor-diagnostics" + + # All warnings are enabled. Maybe enable -Werror as well? + compiler_flag: "-Wall" + # Enable a few more warnings that aren't part of -Wall. + compiler_flag: "-Wunused-but-set-parameter" + # But disable some that are problematic. + compiler_flag: "-Wno-free-nonheap-object" # has false positives + + # Keep stack frames for debugging, even in opt mode. + compiler_flag: "-fno-omit-frame-pointer" + + # Anticipated future default. + linker_flag: "-no-canonical-prefixes" + unfiltered_cxx_flag: "-fno-canonical-system-headers" + # Have gcc return the exit code from ld. + linker_flag: "-pass-exit-codes" + # Stamp the binary with a unique identifier. + linker_flag: "-Wl,--build-id=md5" + linker_flag: "-Wl,--hash-style=gnu" + # Gold linker only? Can we enable this by default? + # linker_flag: "-Wl,--warn-execstack" + # linker_flag: "-Wl,--detect-odr-violations" + + # Include directory for ROCm headers. +%{rocm_include_path} + + compilation_mode_flags { + mode: DBG + # Enable debug symbols. + compiler_flag: "-g" + } + compilation_mode_flags { + mode: OPT + + # No debug symbols. + # Maybe we should enable https://gcc.gnu.org/wiki/DebugFission for opt or + # even generally? However, that can't happen here, as it requires special + # handling in Bazel. + compiler_flag: "-g0" + + # Conservative choice for -O + # -O3 can increase binary size and even slow down the resulting binaries. + # Profile first and / or use FDO if you need better performance than this. + compiler_flag: "-O2" + + # Disable assertions + compiler_flag: "-DNDEBUG" + + # Removal of unused code and data at link time (can this increase binary size in some cases?). + compiler_flag: "-ffunction-sections" + compiler_flag: "-fdata-sections" + linker_flag: "-Wl,--gc-sections" + } + linking_mode_flags { mode: DYNAMIC } +} diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl new file mode 100755 index 0000000000..824238022b --- /dev/null +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl @@ -0,0 +1,241 @@ +#!/usr/bin/env python +"""Crosstool wrapper for compiling ROCm programs. + +SYNOPSIS: + crosstool_wrapper_driver_rocm [options passed in by cc_library() + or cc_binary() rule] + +DESCRIPTION: + This script is expected to be called by the cc_library() or cc_binary() bazel + rules. When the option "-x rocm" is present in the list of arguments passed + to this script, it invokes the hipcc compiler. Most arguments are passed + as is as a string to --compiler-options of hipcc. When "-x rocm" is not + present, this wrapper invokes gcc with the input arguments as is. +""" + +from __future__ import print_function + +__author__ = 'whchung@gmail.com (Wen-Heng (Jack) Chung)' + +from argparse import ArgumentParser +import os +import subprocess +import re +import sys +import pipes + +# Template values set by rocm_configure.bzl. +CPU_COMPILER = ('%{cpu_compiler}') +GCC_HOST_COMPILER_PATH = ('%{gcc_host_compiler_path}') + +HIPCC_PATH = '%{hipcc_path}' +PREFIX_DIR = os.path.dirname(GCC_HOST_COMPILER_PATH) + +def Log(s): + print('gpus/crosstool: {0}'.format(s)) + + +def GetOptionValue(argv, option): + """Extract the list of values for option from the argv list. + + Args: + argv: A list of strings, possibly the argv passed to main(). + option: The option whose value to extract, without the leading '-'. + + Returns: + A list of values, either directly following the option, + (eg., -opt val1 val2) or values collected from multiple occurrences of + the option (eg., -opt val1 -opt val2). + """ + + parser = ArgumentParser() + parser.add_argument('-' + option, nargs='*', action='append') + args, _ = parser.parse_known_args(argv) + if not args or not vars(args)[option]: + return [] + else: + return sum(vars(args)[option], []) + + +def GetHostCompilerOptions(argv): + """Collect the -isystem, -iquote, and --sysroot option values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be used as the --compiler-options to hipcc. + """ + + parser = ArgumentParser() + parser.add_argument('-isystem', nargs='*', action='append') + parser.add_argument('-iquote', nargs='*', action='append') + parser.add_argument('--sysroot', nargs=1) + parser.add_argument('-g', nargs='*', action='append') + parser.add_argument('-fno-canonical-system-headers', action='store_true') + + args, _ = parser.parse_known_args(argv) + + opts = '' + + if args.isystem: + opts += ' -isystem ' + ' -isystem '.join(sum(args.isystem, [])) + if args.iquote: + opts += ' -iquote ' + ' -iquote '.join(sum(args.iquote, [])) + if args.g: + opts += ' -g' + ' -g'.join(sum(args.g, [])) + #if args.fno_canonical_system_headers: + # opts += ' -fno-canonical-system-headers' + if args.sysroot: + opts += ' --sysroot ' + args.sysroot[0] + + return opts + +def GetHipccOptions(argv): + """Collect the -hipcc_options values from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + + Returns: + The string that can be passed directly to hipcc. + """ + + parser = ArgumentParser() + parser.add_argument('-hipcc_options', nargs='*', action='append') + + args, _ = parser.parse_known_args(argv) + + if args.hipcc_options: + options = _update_options(sum(args.hipcc_options, [])) + return ' '.join(['--'+a for a in options]) + return '' + + +def InvokeHipcc(argv, log=False): + """Call hipcc with arguments assembled from argv. + + Args: + argv: A list of strings, possibly the argv passed to main(). + log: True if logging is requested. + + Returns: + The return value of calling os.system('hipcc ' + args) + """ + + host_compiler_options = GetHostCompilerOptions(argv) + hipcc_compiler_options = GetHipccOptions(argv) + opt_option = GetOptionValue(argv, 'O') + m_options = GetOptionValue(argv, 'm') + m_options = ''.join([' -m' + m for m in m_options if m in ['32', '64']]) + include_options = GetOptionValue(argv, 'I') + out_file = GetOptionValue(argv, 'o') + depfiles = GetOptionValue(argv, 'MF') + defines = GetOptionValue(argv, 'D') + defines = ''.join([' -D' + define for define in defines]) + undefines = GetOptionValue(argv, 'U') + undefines = ''.join([' -U' + define for define in undefines]) + std_options = GetOptionValue(argv, 'std') + hipcc_allowed_std_options = ["c++11"] + std_options = ''.join([' -std=' + define + for define in std_options if define in hipcc_allowed_std_options]) + + # The list of source files get passed after the -c option. I don't know of + # any other reliable way to just get the list of source files to be compiled. + src_files = GetOptionValue(argv, 'c') + + if len(src_files) == 0: + return 1 + if len(out_file) != 1: + return 1 + + opt = (' -O2' if (len(opt_option) > 0 and int(opt_option[0]) > 0) + else ' -g') + + includes = (' -I ' + ' -I '.join(include_options) + if len(include_options) > 0 + else '') + + # Unfortunately, there are other options that have -c prefix too. + # So allowing only those look like C/C++ files. + src_files = [f for f in src_files if + re.search('\.cpp$|\.cc$|\.c$|\.cxx$|\.C$', f)] + srcs = ' '.join(src_files) + out = ' -o ' + out_file[0] + + hipccopts = ' ' + hipccopts += ' ' + hipcc_compiler_options + hipccopts += undefines + hipccopts += defines + hipccopts += std_options + hipccopts += m_options + + if depfiles: + # Generate the dependency file + depfile = depfiles[0] + cmd = (HIPCC_PATH + ' ' + hipccopts + + host_compiler_options + + ' ' + GCC_HOST_COMPILER_PATH + + ' -I .' + includes + ' ' + srcs + ' -M -o ' + depfile) + if log: Log(cmd) + exit_status = os.system(cmd) + if exit_status != 0: + return exit_status + + cmd = (HIPCC_PATH + ' ' + hipccopts + + host_compiler_options + ' -fPIC' + + ' ' + GCC_HOST_COMPILER_PATH + + ' -I .' + opt + includes + ' -c ' + srcs + out) + + # TODO(zhengxq): for some reason, 'gcc' needs this help to find 'as'. + # Need to investigate and fix. + cmd = 'PATH=' + PREFIX_DIR + ':$PATH ' + cmd + if log: Log(cmd) + return os.system(cmd) + + +def main(): + # ignore PWD env var + os.environ['PWD']='' + + parser = ArgumentParser() + parser.add_argument('-x', nargs=1) + parser.add_argument('--rocm_log', action='store_true') + parser.add_argument('-pass-exit-codes', action='store_true') + args, leftover = parser.parse_known_args(sys.argv[1:]) + + if args.x and args.x[0] == 'rocm': + if args.rocm_log: Log('-x rocm') + leftover = [pipes.quote(s) for s in leftover] + if args.rocm_log: Log('using hipcc') + return InvokeHipcc(leftover, log=args.rocm_log) + + # XXX use hipcc to link + if args.pass_exit_codes: + gpu_compiler_flags = [flag for flag in sys.argv[1:] + if not flag.startswith(('-pass-exit-codes'))] + + # special handling for $ORIGIN + # - guard every argument with '' + modified_gpu_compiler_flags = [] + for flag in gpu_compiler_flags: + modified_gpu_compiler_flags.append("'" + flag + "'") + + if args.rocm_log: Log('Link with hipcc: %s' % (' '.join([HIPCC_PATH] + modified_gpu_compiler_flags))) + return subprocess.call([HIPCC_PATH] + modified_gpu_compiler_flags) + + # Strip our flags before passing through to the CPU compiler for files which + # are not -x rocm. We can't just pass 'leftover' because it also strips -x. + # We not only want to pass -x to the CPU compiler, but also keep it in its + # relative location in the argv list (the compiler is actually sensitive to + # this). + cpu_compiler_flags = [flag for flag in sys.argv[1:] + if not flag.startswith(('--rocm_log'))] + + # XXX: SE codes need to be built with gcc, but need this macro defined + cpu_compiler_flags.append("-D__HIP_PLATFORM_HCC__") + + return subprocess.call([CPU_COMPILER] + cpu_compiler_flags) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/third_party/gpus/rocm/BUILD b/third_party/gpus/rocm/BUILD new file mode 100644 index 0000000000..e69de29bb2 diff --git a/third_party/gpus/rocm/BUILD.tpl b/third_party/gpus/rocm/BUILD.tpl new file mode 100644 index 0000000000..8258bb3589 --- /dev/null +++ b/third_party/gpus/rocm/BUILD.tpl @@ -0,0 +1,99 @@ +licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like + +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "using_hipcc", + values = { + "define": "using_rocm_hipcc=true", + }, +) + +cc_library( + name = "rocm_headers", + hdrs = [ + "rocm/rocm_config.h", + %{rocm_headers} + ], + includes = [ + ".", + "rocm/include", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "hip", + srcs = ["rocm/lib/%{hip_lib}"], + data = ["rocm/lib/%{hip_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "rocblas", + srcs = ["rocm/lib/%{rocblas_lib}"], + data = ["rocm/lib/%{rocblas_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "rocfft", + srcs = ["rocm/lib/%{rocfft_lib}"], + data = ["rocm/lib/%{rocfft_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "hiprand", + srcs = ["rocm/lib/%{hiprand_lib}"], + data = ["rocm/lib/%{hiprand_lib}"], + includes = [ + ".", + "rocm/include", + "rocm/include/rocrand", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "miopen", + srcs = ["rocm/lib/%{miopen_lib}"], + data = ["rocm/lib/%{miopen_lib}"], + includes = [ + ".", + "rocm/include", + ], + linkstatic = 1, + visibility = ["//visibility:public"], +) + +cc_library( + name = "rocm", + visibility = ["//visibility:public"], + deps = [ + ":rocm_headers", + ":hip", + ":rocblas", + ":rocfft", + ":hiprand", + ":miopen", + ], +) + +%{rocm_include_genrules} diff --git a/third_party/gpus/rocm/build_defs.bzl.tpl b/third_party/gpus/rocm/build_defs.bzl.tpl new file mode 100644 index 0000000000..306f57551f --- /dev/null +++ b/third_party/gpus/rocm/build_defs.bzl.tpl @@ -0,0 +1,32 @@ +# Macros for building ROCm code. +def if_rocm(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with ROCm. + + Returns a select statement which evaluates to if_true if we're building + with ROCm enabled. Otherwise, the select statement evaluates to if_false. + + """ + return select({ + "@local_config_rocm//rocm:using_hipcc": if_true, + "//conditions:default": if_false + }) + + +def rocm_default_copts(): + """Default options for all ROCm compilations.""" + return if_rocm(["-x", "rocm"] + %{rocm_extra_copts}) + + +def rocm_is_configured(): + """Returns true if ROCm was enabled during the configure process.""" + return %{rocm_is_configured} + +def if_rocm_is_configured(x): + """Tests if the ROCm was enabled during the configure process. + + Unlike if_rocm(), this does not require that we are building with + --config=rocm. Used to allow non-ROCm code to depend on ROCm libraries. + """ + if rocm_is_configured(): + return x + return [] diff --git a/third_party/gpus/rocm/rocm_config.h.tpl b/third_party/gpus/rocm/rocm_config.h.tpl new file mode 100644 index 0000000000..c5f25a845c --- /dev/null +++ b/third_party/gpus/rocm/rocm_config.h.tpl @@ -0,0 +1,21 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef ROCM_ROCM_CONFIG_H_ +#define ROCM_ROCM_CONFIG_H_ + +#define TF_ROCM_TOOLKIT_PATH "/opt/rocm" + +#endif // ROCM_ROCM_CONFIG_H_ diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl new file mode 100644 index 0000000000..9371e33f97 --- /dev/null +++ b/third_party/gpus/rocm_configure.bzl @@ -0,0 +1,663 @@ +# -*- Python -*- +"""Repository rule for ROCm autoconfiguration. + +`rocm_configure` depends on the following environment variables: + + * `TF_NEED_ROCM`: Whether to enable building with ROCm. + * `GCC_HOST_COMPILER_PATH`: The GCC host compiler path + * `ROCM_TOOLKIT_PATH`: The path to the ROCm toolkit. Default is + `/opt/rocm`. + * `TF_ROCM_VERSION`: The version of the ROCm toolkit. If this is blank, then + use the system default. + * `TF_MIOPEN_VERSION`: The version of the MIOpen library. + * `TF_ROCM_AMDGPU_TARGETS`: The AMDGPU targets. Default is + `gfx803,gfx900`. +""" + +_GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" +_ROCM_TOOLKIT_PATH = "ROCM_TOOLKIT_PATH" +_TF_ROCM_VERSION = "TF_ROCM_VERSION" +_TF_MIOPEN_VERSION = "TF_MIOPEN_VERSION" +_TF_ROCM_AMDGPU_TARGETS = "TF_ROCM_AMDGPU_TARGETS" +_TF_ROCM_CONFIG_REPO = "TF_ROCM_CONFIG_REPO" + +_DEFAULT_ROCM_VERSION = "" +_DEFAULT_MIOPEN_VERSION = "" +_DEFAULT_ROCM_TOOLKIT_PATH = "/opt/rocm" +_DEFAULT_ROCM_AMDGPU_TARGETS = ["gfx803", "gfx900"] + +def find_cc(repository_ctx): + """Find the C++ compiler.""" + # Return a dummy value for GCC detection here to avoid error + target_cc_name = "gcc" + cc_path_envvar = _GCC_HOST_COMPILER_PATH + cc_name = target_cc_name + + if cc_path_envvar in repository_ctx.os.environ: + cc_name_from_env = repository_ctx.os.environ[cc_path_envvar].strip() + if cc_name_from_env: + cc_name = cc_name_from_env + if cc_name.startswith("/"): + # Absolute path, maybe we should make this supported by our which function. + return cc_name + cc = repository_ctx.which(cc_name) + if cc == None: + fail(("Cannot find {}, either correct your path or set the {}" + + " environment variable").format(target_cc_name, cc_path_envvar)) + return cc + +_INC_DIR_MARKER_BEGIN = "#include <...>" + +def _cxx_inc_convert(path): + """Convert path returned by cc -E xc++ in a complete path.""" + path = path.strip() + return path + +def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp): + """Compute the list of default C or C++ include directories.""" + if lang_is_cpp: + lang = "c++" + else: + lang = "c" + # TODO: We pass -no-canonical-prefixes here to match the compiler flags, + # but in rocm_clang CROSSTOOL file that is a `feature` and we should + # handle the case when it's disabled and no flag is passed + result = repository_ctx.execute([cc, "-no-canonical-prefixes", + "-E", "-x" + lang, "-", "-v"]) + index1 = result.stderr.find(_INC_DIR_MARKER_BEGIN) + if index1 == -1: + return [] + index1 = result.stderr.find("\n", index1) + if index1 == -1: + return [] + index2 = result.stderr.rfind("\n ") + if index2 == -1 or index2 < index1: + return [] + index2 = result.stderr.find("\n", index2 + 1) + if index2 == -1: + inc_dirs = result.stderr[index1 + 1:] + else: + inc_dirs = result.stderr[index1 + 1:index2].strip() + + return [str(repository_ctx.path(_cxx_inc_convert(p))) + for p in inc_dirs.split("\n")] + +def get_cxx_inc_directories(repository_ctx, cc): + """Compute the list of default C and C++ include directories.""" + # For some reason `clang -xc` sometimes returns include paths that are + # different from the ones from `clang -xc++`. (Symlink and a dir) + # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists + includes_cpp = _get_cxx_inc_directories_impl(repository_ctx, cc, True) + includes_c = _get_cxx_inc_directories_impl(repository_ctx, cc, False) + + includes_cpp_set = depset(includes_cpp) + return includes_cpp + [inc for inc in includes_c + if inc not in includes_cpp_set] + +def auto_configure_fail(msg): + """Output failure message when rocm configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("\n%sROCm Configuration Error:%s %s\n" % (red, no_color, msg)) +# END cc_configure common functions (see TODO above). + +def _host_compiler_includes(repository_ctx, cc): + """Generates the cxx_builtin_include_directory entries for gcc inc dirs. + + Args: + repository_ctx: The repository context. + cc: The path to the gcc host compiler. + + Returns: + A string containing the cxx_builtin_include_directory for each of the gcc + host compiler include directories, which can be added to the CROSSTOOL + file. + """ + inc_dirs = get_cxx_inc_directories(repository_ctx, cc) + + # Add numpy headers + inc_dirs.append("/usr/lib/python2.7/dist-packages/numpy/core/include") + + entries = [] + for inc_dir in inc_dirs: + entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir) + + # define TENSORFLOW_USE_ROCM + entries.append(" unfiltered_cxx_flag: \"-DTENSORFLOW_USE_ROCM\"") + + return "\n".join(entries) + +def _rocm_include_path(repository_ctx, rocm_config): + """Generates the cxx_builtin_include_directory entries for rocm inc dirs. + + Args: + repository_ctx: The repository context. + cc: The path to the gcc host compiler. + + Returns: + A string containing the cxx_builtin_include_directory for each of the gcc + host compiler include directories, which can be added to the CROSSTOOL + file. + """ + inc_dirs = [] + + # general ROCm include path + inc_dirs.append(rocm_config.rocm_toolkit_path + '/include') + + # Add HSA headers + inc_dirs.append("/opt/rocm/hsa/include") + + # Add HIP headers + inc_dirs.append("/opt/rocm/include/hip") + inc_dirs.append("/opt/rocm/include/hip/hcc_detail") + + # Add rocrand and hiprand headers + inc_dirs.append("/opt/rocm/rocrand/include") + inc_dirs.append("/opt/rocm/hiprand/include") + + # Add rocfft headers + inc_dirs.append("/opt/rocm/rocfft/include") + + # Add rocBLAS headers + inc_dirs.append("/opt/rocm/rocblas/include") + + # Add MIOpen headers + inc_dirs.append("/opt/rocm/miopen/include") + + # Add hcc headers + inc_dirs.append("/opt/rocm/hcc/include") + inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/7.0.0/include/") + inc_dirs.append("/opt/rocm/hcc/lib/clang/7.0.0/include") + # Newer hcc builds use/are based off of clang 8.0.0. + inc_dirs.append("/opt/rocm/hcc/compiler/lib/clang/8.0.0/include/") + inc_dirs.append("/opt/rocm/hcc/lib/clang/8.0.0/include") + + inc_entries = [] + for inc_dir in inc_dirs: + inc_entries.append(" cxx_builtin_include_directory: \"%s\"" % inc_dir) + return "\n".join(inc_entries) + +def _enable_rocm(repository_ctx): + if "TF_NEED_ROCM" in repository_ctx.os.environ: + enable_rocm = repository_ctx.os.environ["TF_NEED_ROCM"].strip() + return enable_rocm == "1" + return False + +def _rocm_toolkit_path(repository_ctx): + """Finds the rocm toolkit directory. + + Args: + repository_ctx: The repository context. + + Returns: + A speculative real path of the rocm toolkit install directory. + """ + rocm_toolkit_path = _DEFAULT_ROCM_TOOLKIT_PATH + if _ROCM_TOOLKIT_PATH in repository_ctx.os.environ: + rocm_toolkit_path = repository_ctx.os.environ[_ROCM_TOOLKIT_PATH].strip() + if not repository_ctx.path(rocm_toolkit_path).exists: + auto_configure_fail("Cannot find rocm toolkit path.") + return str(repository_ctx.path(rocm_toolkit_path).realpath) + +def _amdgpu_targets(repository_ctx): + """Returns a list of strings representing AMDGPU targets.""" + if _TF_ROCM_AMDGPU_TARGETS not in repository_ctx.os.environ: + return _DEFAULT_ROCM_AMDGPU_TARGETS + amdgpu_targets_str = repository_ctx.os.environ[_TF_ROCM_AMDGPU_TARGETS] + amdgpu_targets = amdgpu_targets_str.split(",") + for amdgpu_target in amdgpu_targets: + if amdgpu_target[:3] != "gfx" or not amdgpu_target[3:].isdigit(): + auto_configure_fail("Invalid AMDGPU target: %s" % amdgpu_target) + return amdgpu_targets + +def _cpu_value(repository_ctx): + """Returns the name of the host operating system. + + Args: + repository_ctx: The repository context. + + Returns: + A string containing the name of the host operating system. + """ + os_name = repository_ctx.os.name.lower() + if os_name.startswith("mac os"): + return "Darwin" + if os_name.find("windows") != -1: + return "Windows" + result = repository_ctx.execute(["uname", "-s"]) + return result.stdout.strip() + +def _lib_name(lib, cpu_value, version="", static=False): + """Constructs the platform-specific name of a library. + + Args: + lib: The name of the library, such as "hip" + cpu_value: The name of the host operating system. + version: The version of the library. + static: True the library is static or False if it is a shared object. + + Returns: + The platform-specific name of the library. + """ + if cpu_value in ("Linux"): + if static: + return "lib%s.a" % lib + else: + if version: + version = ".%s" % version + return "lib%s.so%s" % (lib, version) + elif cpu_value == "Windows": + return "%s.lib" % lib + elif cpu_value == "Darwin": + if static: + return "lib%s.a" % lib + elif version: + version = ".%s" % version + return "lib%s%s.dylib" % (lib, version) + else: + auto_configure_fail("Invalid cpu_value: %s" % cpu_value) + +def _find_rocm_lib(lib, repository_ctx, cpu_value, basedir, version="", + static=False): + """Finds the given ROCm libraries on the system. + + Args: + lib: The name of the library, such as "hip" + repository_ctx: The repository context. + cpu_value: The name of the host operating system. + basedir: The install directory of ROCm. + version: The version of the library. + static: True if static library, False if shared object. + + Returns: + Returns a struct with the following fields: + file_name: The basename of the library found on the system. + path: The full path to the library. + """ + file_name = _lib_name(lib, cpu_value, version, static) + if cpu_value == "Linux": + path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + path = repository_ctx.path( + "%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + + path = repository_ctx.path("%s/lib/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + path = repository_ctx.path("%s/%s" % (basedir, file_name)) + if path.exists: + return struct(file_name=file_name, path=str(path.realpath)) + + auto_configure_fail("Cannot find rocm library %s" % file_name) + +def _find_libs(repository_ctx, rocm_config): + """Returns the ROCm libraries on the system. + + Args: + repository_ctx: The repository context. + rocm_config: The ROCm config as returned by _get_rocm_config + + Returns: + Map of library names to structs of filename and path as returned by + _find_rocm_lib. + """ + cpu_value = rocm_config.cpu_value + return { + "hip": _find_rocm_lib( + "hip_hcc", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path), + "rocblas": _find_rocm_lib( + "rocblas", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocblas"), + "rocfft": _find_rocm_lib( + "rocfft", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/rocfft"), + "hiprand": _find_rocm_lib( + "hiprand", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/hiprand"), + "miopen": _find_rocm_lib( + "MIOpen", repository_ctx, cpu_value, rocm_config.rocm_toolkit_path + "/miopen"), + } + +def _get_rocm_config(repository_ctx): + """Detects and returns information about the ROCm installation on the system. + + Args: + repository_ctx: The repository context. + + Returns: + A struct containing the following fields: + rocm_toolkit_path: The ROCm toolkit installation directory. + amdgpu_targets: A list of the system's AMDGPU targets. + cpu_value: The name of the host operating system. + """ + cpu_value = _cpu_value(repository_ctx) + rocm_toolkit_path = _rocm_toolkit_path(repository_ctx) + return struct( + rocm_toolkit_path = rocm_toolkit_path, + amdgpu_targets = _amdgpu_targets(repository_ctx), + cpu_value = cpu_value) + +def _tpl(repository_ctx, tpl, substitutions={}, out=None): + if not out: + out = tpl.replace(":", "/") + repository_ctx.template( + out, + Label("//third_party/gpus/%s.tpl" % tpl), + substitutions) + + +def _file(repository_ctx, label): + repository_ctx.template( + label.replace(":", "/"), + Label("//third_party/gpus/%s.tpl" % label), + {}) + + +_DUMMY_CROSSTOOL_BZL_FILE = """ +def error_gpu_disabled(): + fail("ERROR: Building with --config=rocm but TensorFlow is not configured " + + "to build with GPU support. Please re-run ./configure and enter 'Y' " + + "at the prompt to build with GPU support.") + + native.genrule( + name = "error_gen_crosstool", + outs = ["CROSSTOOL"], + cmd = "echo 'Should not be run.' && exit 1", + ) + + native.filegroup( + name = "crosstool", + srcs = [":CROSSTOOL"], + output_licenses = ["unencumbered"], + ) +""" + + +_DUMMY_CROSSTOOL_BUILD_FILE = """ +load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled") + +error_gpu_disabled() +""" + +def _create_dummy_repository(repository_ctx): + cpu_value = _cpu_value(repository_ctx) + + # Set up BUILD file for rocm/. + _tpl(repository_ctx, "rocm:build_defs.bzl", + { + "%{rocm_is_configured}": "False", + "%{rocm_extra_copts}": "[]" + }) + _tpl(repository_ctx, "rocm:BUILD", + { + "%{hip_lib}": _lib_name("hip", cpu_value), + "%{rocblas_lib}": _lib_name("rocblas", cpu_value), + "%{miopen_lib}": _lib_name("miopen", cpu_value), + "%{rocfft_lib}": _lib_name("rocfft", cpu_value), + "%{hiprand_lib}": _lib_name("hiprand", cpu_value), + "%{rocm_include_genrules}": '', + "%{rocm_headers}": '', + }) + + # Create dummy files for the ROCm toolkit since they are still required by + # tensorflow/core/platform/default/build_config:rocm. + repository_ctx.file("rocm/hip/include/hip/hip_runtime.h", "") + + # Set up rocm_config.h, which is used by + # tensorflow/stream_executor/dso_loader.cc. + _tpl(repository_ctx, "rocm:rocm_config.h", + { + "%{rocm_toolkit_path}": _DEFAULT_ROCM_TOOLKIT_PATH, + }, "rocm/rocm/rocm_config.h") + + # If rocm_configure is not configured to build with GPU support, and the user + # attempts to build with --config=rocm, add a dummy build rule to intercept + # this and fail with an actionable error message. + repository_ctx.file("crosstool/error_gpu_disabled.bzl", + _DUMMY_CROSSTOOL_BZL_FILE) + repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE) + +def _execute(repository_ctx, cmdline, error_msg=None, error_details=None, + empty_stdout_fine=False): + """Executes an arbitrary shell command. + + Args: + repository_ctx: the repository_ctx object + cmdline: list of strings, the command to execute + error_msg: string, a summary of the error if the command fails + error_details: string, details about the error or steps to fix it + empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise + it's an error + Return: + the result of repository_ctx.execute(cmdline) + """ + result = repository_ctx.execute(cmdline) + if result.stderr or not (empty_stdout_fine or result.stdout): + auto_configure_fail( + "\n".join([ + error_msg.strip() if error_msg else "Repository command failed", + result.stderr.strip(), + error_details if error_details else ""])) + return result + +def _norm_path(path): + """Returns a path with '/' and remove the trailing slash.""" + path = path.replace("\\", "/") + if path[-1] == "/": + path = path[:-1] + return path + +def _symlink_genrule_for_dir(repository_ctx, src_dir, dest_dir, genrule_name, + src_files = [], dest_files = []): + """Returns a genrule to symlink(or copy if on Windows) a set of files. + + If src_dir is passed, files will be read from the given directory; otherwise + we assume files are in src_files and dest_files + """ + if src_dir != None: + src_dir = _norm_path(src_dir) + dest_dir = _norm_path(dest_dir) + files = _read_dir(repository_ctx, src_dir) + # Create a list with the src_dir stripped to use for outputs. + dest_files = files.replace(src_dir, '').splitlines() + src_files = files.splitlines() + command = [] + # We clear folders that might have been generated previously to avoid + # undesired inclusions + command.append('if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi') + command.append('if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi') + outs = [] + for i in range(len(dest_files)): + if dest_files[i] != "": + # If we have only one file to link we do not want to use the dest_dir, as + # $(@D) will include the full path to the file. + dest = '$(@D)/' + dest_dir + dest_files[i] if len(dest_files) != 1 else '$(@D)/' + dest_files[i] + # On Windows, symlink is not supported, so we just copy all the files. + cmd = 'ln -s' + command.append(cmd + ' "%s" "%s"' % (src_files[i] , dest)) + outs.append(' "' + dest_dir + dest_files[i] + '",') + genrule = _genrule(src_dir, genrule_name, " && ".join(command), + "\n".join(outs)) + return genrule + +def _genrule(src_dir, genrule_name, command, outs): + """Returns a string with a genrule. + + Genrule executes the given command and produces the given outputs. + """ + return ( + 'genrule(\n' + + ' name = "' + + genrule_name + '",\n' + + ' outs = [\n' + + outs + + '\n ],\n' + + ' cmd = """\n' + + command + + '\n """,\n' + + ')\n' + ) + +def _read_dir(repository_ctx, src_dir): + """Returns a string with all files in a directory. + + Finds all files inside a directory, traversing subfolders and following + symlinks. The returned string contains the full path of all files + separated by line breaks. + """ + find_result = _execute( + repository_ctx, ["find", src_dir, "-follow", "-type", "f"], + empty_stdout_fine=True) + result = find_result.stdout + return result + +def _compute_rocm_extra_copts(repository_ctx, amdgpu_targets): + if False: + amdgpu_target_flags = ["--amdgpu-target=" + + amdgpu_target for amdgpu_target in amdgpu_targets] + else: + # AMDGPU targets are handled in the "crosstool_wrapper_driver_is_not_gcc" + amdgpu_target_flags = [] + return str(amdgpu_target_flags) + +def _create_local_rocm_repository(repository_ctx): + """Creates the repository containing files set up to build with ROCm.""" + rocm_config = _get_rocm_config(repository_ctx) + + # Set up symbolic links for the rocm toolkit by creating genrules to do + # symlinking. We create one genrule for each directory we want to track under + # rocm_toolkit_path + rocm_toolkit_path = rocm_config.rocm_toolkit_path + rocm_include_path = rocm_toolkit_path + "/include" + genrules = [_symlink_genrule_for_dir(repository_ctx, + rocm_include_path, "rocm/include", "rocm-include")] + genrules.append(_symlink_genrule_for_dir(repository_ctx, + rocm_toolkit_path + "/rocfft/include", "rocm/include/rocfft", "rocfft-include")) + genrules.append(_symlink_genrule_for_dir(repository_ctx, + rocm_toolkit_path + "/rocblas/include", "rocm/include/rocblas", "rocblas-include")) + genrules.append(_symlink_genrule_for_dir(repository_ctx, + rocm_toolkit_path + "/miopen/include", "rocm/include/miopen", "miopen-include")) + + rocm_libs = _find_libs(repository_ctx, rocm_config) + rocm_lib_src = [] + rocm_lib_dest = [] + for lib in rocm_libs.values(): + rocm_lib_src.append(lib.path) + rocm_lib_dest.append("rocm/lib/" + lib.file_name) + genrules.append(_symlink_genrule_for_dir(repository_ctx, None, "", "rocm-lib", + rocm_lib_src, rocm_lib_dest)) + + included_files = _read_dir(repository_ctx, rocm_include_path).replace( + rocm_include_path, '').splitlines() + + # Set up BUILD file for rocm/ + _tpl(repository_ctx, "rocm:build_defs.bzl", + { + "%{rocm_is_configured}": "True", + "%{rocm_extra_copts}": _compute_rocm_extra_copts( + repository_ctx, rocm_config.amdgpu_targets), + + }) + _tpl(repository_ctx, "rocm:BUILD", + { + "%{hip_lib}": rocm_libs["hip"].file_name, + "%{rocblas_lib}": rocm_libs["rocblas"].file_name, + "%{rocfft_lib}": rocm_libs["rocfft"].file_name, + "%{hiprand_lib}": rocm_libs["hiprand"].file_name, + "%{miopen_lib}": rocm_libs["miopen"].file_name, + "%{rocm_include_genrules}": "\n".join(genrules), + "%{rocm_headers}": ('":rocm-include",\n' + + '":rocfft-include",\n' + + '":rocblas-include",\n' + + '":miopen-include",'), + }) + # Set up crosstool/ + _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty", "%{win_linker_files}": ":empty"}) + cc = find_cc(repository_ctx) + host_compiler_includes = _host_compiler_includes(repository_ctx, cc) + rocm_defines = { + "%{rocm_include_path}": _rocm_include_path(repository_ctx, + rocm_config), + "%{host_compiler_includes}": host_compiler_includes, + "%{clang_path}": str(cc), + } + + _tpl(repository_ctx, "crosstool:CROSSTOOL_hipcc", rocm_defines, out="crosstool/CROSSTOOL") + + _tpl(repository_ctx, + "crosstool:clang/bin/crosstool_wrapper_driver_rocm", + { + "%{cpu_compiler}": str(cc), + "%{hipcc_path}": "/opt/rocm/bin/hipcc", + "%{gcc_host_compiler_path}": str(cc), + "%{rocm_amdgpu_targets}": ",".join( + ["\"%s\"" % c for c in rocm_config.amdgpu_targets]), + }) + + # Set up rocm_config.h, which is used by + # tensorflow/stream_executor/dso_loader.cc. + _tpl(repository_ctx, "rocm:rocm_config.h", + { + "%{rocm_amdgpu_targets}": ",".join( + ["\"%s\"" % c for c in rocm_config.amdgpu_targets]), + "%{rocm_toolkit_path}": rocm_config.rocm_toolkit_path, + }, "rocm/rocm/rocm_config.h") + + +def _create_remote_rocm_repository(repository_ctx, remote_config_repo): + """Creates pointers to a remotely configured repo set up to build with ROCm.""" + _tpl(repository_ctx, "rocm:build_defs.bzl", + { + "%{rocm_is_configured}": "True", + "%{rocm_extra_copts}": _compute_rocm_extra_copts( + repository_ctx, #_compute_capabilities(repository_ctx) + ), + + }) + _tpl(repository_ctx, "rocm:remote.BUILD", + { + "%{remote_rocm_repo}": remote_config_repo, + }, "rocm/BUILD") + _tpl(repository_ctx, "crosstool:remote.BUILD", { + "%{remote_rocm_repo}": remote_config_repo, + }, "crosstool/BUILD") + +def _rocm_autoconf_impl(repository_ctx): + """Implementation of the rocm_autoconf repository rule.""" + if not _enable_rocm(repository_ctx): + _create_dummy_repository(repository_ctx) + else: + if _TF_ROCM_CONFIG_REPO in repository_ctx.os.environ: + _create_remote_rocm_repository(repository_ctx, + repository_ctx.os.environ[_TF_ROCM_CONFIG_REPO]) + else: + _create_local_rocm_repository(repository_ctx) + + +rocm_configure = repository_rule( + implementation = _rocm_autoconf_impl, + environ = [ + _GCC_HOST_COMPILER_PATH, + "TF_NEED_ROCM", + _ROCM_TOOLKIT_PATH, + _TF_ROCM_VERSION, + _TF_MIOPEN_VERSION, + _TF_ROCM_AMDGPU_TARGETS, + _TF_ROCM_CONFIG_REPO, + ], +) + +"""Detects and configures the local ROCm toolchain. + +Add the following to your WORKSPACE FILE: + +```python +rocm_configure(name = "local_config_rocm") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/tools/bazel.rc b/tools/bazel.rc index 601e07ffdd..afc5cf56ab 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -42,6 +42,9 @@ build:download_clang_use_lld --linkopt='-fuse-ld=lld' build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --define=using_cuda=true --define=using_cuda_nvcc=true +build:rocm --crosstool_top=@local_config_rocm//crosstool:toolchain +build:rocm --define=using_rocm=true --define=using_rocm_hipcc=true + build:cuda_clang --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda_clang --define=using_cuda=true --define=using_cuda_clang=true --define=using_clang=true -- GitLab From d0574f6b25ab01052e093ab92612520a7e4ada8d Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Thu, 6 Sep 2018 08:22:37 -0700 Subject: [PATCH 019/570] Fixed clang formatting --- .../stream_executor/cuda/cuda_gpu_executor.cc | 17 +++++++++-------- .../stream_executor/cuda/cuda_gpu_executor.h | 12 ++++++------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index ce2f1ce3ae..ef84d01a94 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -493,10 +493,10 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, // Compute and return maximum blocks per core (occupancy) based on the // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. -int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { +int CUDAExecutor::CalculateOccupancy( + const DeviceDescription& device_description, uint64 registers_per_thread, + uint64 shared_memory_per_block, const ThreadDim& thread_dims, + CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; CUresult err = @@ -509,10 +509,11 @@ int CUDAExecutor::CalculateOccupancy(const DeviceDescription& device_description // Compute and return the suggested thread count to acheive ideal occupancy. // If the provided thread dimensions match this number, zero is returned. int CUDAExecutor::CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func) { + const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, + CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; CUresult err = diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index e8ebbc3220..1481dcc19a 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -71,16 +71,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface { const KernelArgsArrayBase &args) override; int CalculateOccupancy(const DeviceDescription& device_description, + uint64 registers_per_thread, + uint64 shared_memory_per_block, + const ThreadDim& thread_dims, CUfunction func); + + int CompareOccupancy(int* initial_blocks, + const DeviceDescription& device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, const ThreadDim& thread_dims, CUfunction func); - int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, - uint64 registers_per_thread, - uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); - void *Allocate(uint64 size) override; void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes, -- GitLab From e3654a3cb4e26c26409aeeb9e127e3addcb14cee Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 6 Sep 2018 19:20:11 +0000 Subject: [PATCH 020/570] Add float16 support on GPU for tf.contrib.image.transform This fix tries to address the issue raised in 22115 where there were no float16 support on GPU for tf.contrib.image.transform. This fix fixes 22115. Signed-off-by: Yong Tang --- tensorflow/contrib/image/kernels/image_ops.cc | 2 ++ tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc | 1 + 2 files changed, 3 insertions(+) diff --git a/tensorflow/contrib/image/kernels/image_ops.cc b/tensorflow/contrib/image/kernels/image_ops.cc index 370a8caf6a..788bf04b28 100644 --- a/tensorflow/contrib/image/kernels/image_ops.cc +++ b/tensorflow/contrib/image/kernels/image_ops.cc @@ -156,6 +156,7 @@ namespace functor { TF_CALL_uint8(DECLARE_FUNCTOR); TF_CALL_int32(DECLARE_FUNCTOR); TF_CALL_int64(DECLARE_FUNCTOR); +TF_CALL_half(DECLARE_FUNCTOR); TF_CALL_float(DECLARE_FUNCTOR); TF_CALL_double(DECLARE_FUNCTOR); @@ -175,6 +176,7 @@ TF_CALL_double(DECLARE_FUNCTOR); TF_CALL_uint8(REGISTER); TF_CALL_int32(REGISTER); TF_CALL_int64(REGISTER); +TF_CALL_half(REGISTER); TF_CALL_float(REGISTER); TF_CALL_double(REGISTER); diff --git a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc index 8743a5ff72..36b9a236a6 100644 --- a/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc +++ b/tensorflow/contrib/image/kernels/image_ops_gpu.cu.cc @@ -32,6 +32,7 @@ typedef Eigen::GpuDevice GPUDevice; template class FillProjectiveTransform; template class FillProjectiveTransform; template class FillProjectiveTransform; +template class FillProjectiveTransform; template class FillProjectiveTransform; template class FillProjectiveTransform; -- GitLab From 7d7e8a725aeede4b724f7376d22df2c7f2ebdcf9 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 6 Sep 2018 19:22:39 +0000 Subject: [PATCH 021/570] Add test case for float16 support on GPU for tf.contrib.image.transform Signed-off-by: Yong Tang --- .../contrib/image/python/kernel_tests/image_ops_test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index 376c0751ee..ef1f79bb94 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -272,6 +272,13 @@ class ImageOpsTest(test_util.TensorFlowTestCase): with self.cached_session(): self.assertAllEqual([[[[1], [0]], [[0], [1]]]], result.eval()) + def test_transform_data_types(self): + for dtype in _DTYPES: + image = constant_op.constant([[1, 2], [3, 4]], dtype=dtype) + value = image_ops.transform(image, [1] * 8) + with self.test_session(use_gpu=True): + self.assertAllEqual(value.eval(), np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype())) + class BipartiteMatchTest(test_util.TensorFlowTestCase): -- GitLab From 04e20965487c36f43ba5c773b547b23e39478a5c Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 6 Sep 2018 19:25:22 +0000 Subject: [PATCH 022/570] Pylint fix Signed-off-by: Yong Tang --- .../contrib/image/python/kernel_tests/image_ops_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index ef1f79bb94..4997c31a7f 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -277,7 +277,9 @@ class ImageOpsTest(test_util.TensorFlowTestCase): image = constant_op.constant([[1, 2], [3, 4]], dtype=dtype) value = image_ops.transform(image, [1] * 8) with self.test_session(use_gpu=True): - self.assertAllEqual(value.eval(), np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype())) + self.assertAllEqual( + value.eval(), + np.array([[4, 4], [4, 4]]).astype(dtype.as_numpy_dtype())) class BipartiteMatchTest(test_util.TensorFlowTestCase): -- GitLab From 6a5090b086bc9d665eb9e65f05eb94cdb58baaa2 Mon Sep 17 00:00:00 2001 From: Matt Conley Date: Thu, 6 Sep 2018 13:09:12 -0700 Subject: [PATCH 023/570] Fully fixed clang errors --- tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 12 ++++++------ tensorflow/stream_executor/cuda/cuda_gpu_executor.h | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index ef84d01a94..9d5bcc7f77 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -472,7 +472,7 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, const DeviceDescription &device_description = kernel.parent()->GetDeviceDescription(); - const CUDAKernel* cuda_kernel = AsCUDAKernel(&kernel); + const CUDAKernel *cuda_kernel = AsCUDAKernel(&kernel); CUfunction cufunc = cuda_kernel->AsCUDAFunctionValue(); int blocks_per_sm = CalculateOccupancy(device_description, regs_per_thread, @@ -494,8 +494,8 @@ void CUDAExecutor::VlogOccupancyInfo(const KernelBase &kernel, // device description, some kernel characteristics and the number of threads per // block. If unable to compute occupancy, zero is returned. int CUDAExecutor::CalculateOccupancy( - const DeviceDescription& device_description, uint64 registers_per_thread, - uint64 shared_memory_per_block, const ThreadDim& thread_dims, + const DeviceDescription &device_description, uint64 registers_per_thread, + uint64 shared_memory_per_block, const ThreadDim &thread_dims, CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; @@ -508,11 +508,11 @@ int CUDAExecutor::CalculateOccupancy( // Compute and return the suggested thread count to acheive ideal occupancy. // If the provided thread dimensions match this number, zero is returned. -int CUDAExecutor::CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, +int CUDAExecutor::CompareOccupancy(int *initial_blocks, + const DeviceDescription &device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, - const ThreadDim& thread_dims, + const ThreadDim &thread_dims, CUfunction func) { int suggested_blocks = 0; int suggested_threads = 0; diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h index 1481dcc19a..53b2a29ae7 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.h +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.h @@ -70,16 +70,16 @@ class CUDAExecutor : public internal::StreamExecutorInterface { const BlockDim &block_dims, const KernelBase &k, const KernelArgsArrayBase &args) override; - int CalculateOccupancy(const DeviceDescription& device_description, + int CalculateOccupancy(const DeviceDescription &device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); + const ThreadDim &thread_dims, CUfunction func); - int CompareOccupancy(int* initial_blocks, - const DeviceDescription& device_description, + int CompareOccupancy(int *initial_blocks, + const DeviceDescription &device_description, uint64 registers_per_thread, uint64 shared_memory_per_block, - const ThreadDim& thread_dims, CUfunction func); + const ThreadDim &thread_dims, CUfunction func); void *Allocate(uint64 size) override; -- GitLab From e25cf78285fef5234380ee26fef9090a939e91f5 Mon Sep 17 00:00:00 2001 From: Richard Yu Date: Thu, 6 Sep 2018 17:05:08 -0700 Subject: [PATCH 024/570] Ensure all ValueErrors are raised --- tensorflow/contrib/quantize/python/fold_batch_norms.py | 2 +- tensorflow/python/keras/layers/embeddings.py | 8 ++++---- tensorflow/python/ops/nn_ops.py | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index d9f179bee4..d882b79892 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -628,7 +628,7 @@ def _GetBatchNormParams(graph, context, has_scaling): bn_decay_var_tensor = _FindMatchingTensor(graph, op_suffix_bn_decay_var, context) if batch_mean_tensor is None and moving_mean_tensor is None: - ValueError('Error folding unfused batch norms') + raise ValueError('Error folding unfused batch norms') if has_scaling: gamma_tensor = _FindMatchingTensor(graph, op_suffix_gamma, context) diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py index 629a9ec9a1..a0b9393812 100644 --- a/tensorflow/python/keras/layers/embeddings.py +++ b/tensorflow/python/keras/layers/embeddings.py @@ -142,13 +142,13 @@ class Embedding(Layer): else: in_lens = [self.input_length] if len(in_lens) != len(input_shape) - 1: - ValueError('"input_length" is %s, but received input has shape %s' % - (str(self.input_length), str(input_shape))) + raise ValueError('"input_length" is %s, but received input has shape %s' % + (str(self.input_length), str(input_shape))) else: for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])): if s1 is not None and s2 is not None and s1 != s2: - ValueError('"input_length" is %s, but received input has shape %s' % - (str(self.input_length), str(input_shape))) + raise ValueError('"input_length" is %s, but received input has shape %s' % + (str(self.input_length), str(input_shape))) elif s1 is None: in_lens[i] = s2 return (input_shape[0],) + tuple(in_lens) + (self.output_dim,) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index ef9afd9e8e..17e10995f2 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -427,8 +427,8 @@ class _WithSpaceToBatch(object): try: input_shape.with_rank_at_least(expected_input_rank) except ValueError: - ValueError("input tensor must have rank %d at least" % - (expected_input_rank)) + raise ValueError("input tensor must have rank %d at least" % + (expected_input_rank)) const_rate = tensor_util.constant_value(dilation_rate) rate_or_const_rate = dilation_rate @@ -818,12 +818,12 @@ class Convolution(object): try: input_shape.with_rank(num_spatial_dims + 2) except ValueError: - ValueError("input tensor must have rank %d" % (num_spatial_dims + 2)) + raise ValueError("input tensor must have rank %d" % (num_spatial_dims + 2)) try: filter_shape.with_rank(num_spatial_dims + 2) except ValueError: - ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2)) + raise ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2)) if data_format is None or not data_format.startswith("NC"): input_channels_dim = input_shape[num_spatial_dims + 1] -- GitLab From 864e290d1776895d7877777b8368ca8bc6fc22a3 Mon Sep 17 00:00:00 2001 From: Edvard Fagerholm Date: Wed, 29 Aug 2018 11:56:35 +0300 Subject: [PATCH 025/570] Make tf.transpose emit simpler graph when possible If not given an explicit 'perm' parameter, tf.transpose currently emits a graph that dynamically calculates it from the rank of the input tensor. This is completely unnecessary when the rank of the input can be statically determined at graph construction time. Modify tf.transpose to emit 'perm' as a single Const node whenever possible. --- tensorflow/python/ops/array_ops.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 7bf3869ddf..9597839301 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1409,8 +1409,13 @@ def transpose(a, perm=None, name="transpose", conjugate=False): gen_array_ops.conjugate_transpose if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose) if perm is None: - rank = gen_array_ops.rank(a) - perm = (rank - 1) - gen_math_ops._range(0, rank, 1) + a = ops.convert_to_tensor(a, name="a") + if not a.get_shape().ndims: + rank = gen_array_ops.rank(a) + perm = (rank - 1) - gen_math_ops._range(0, rank, 1) + else: + rank = a.get_shape().ndims + perm = (rank - 1) - np.arange(rank) ret = transpose_fn(a, perm, name=name) # NOTE(mrry): Setting the shape explicitly because # reverse is not handled by the shape function. -- GitLab From 90cf7fb7786c8a9c135ef73482856b082e80f61a Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Tue, 11 Sep 2018 12:48:30 +0800 Subject: [PATCH 026/570] Fix lint errors and typos. --- tensorflow/compiler/tests/binary_ops_test.py | 9 +++++---- tensorflow/compiler/tf2xla/kernels/relu_op.cc | 14 +++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 8941dd4e27..069e83d083 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -179,11 +179,12 @@ class BinaryOpsTest(xla_test.XLATestCase): expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) self._testBinary( - gen_nn_ops._leaky_relu_grad, + gen_nn_ops.leaky_relu_grad, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), - np.array( - [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype), - expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], dtype=dtype)) + np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], + dtype=dtype), + expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], + dtype=dtype)) self._testBinary( gen_nn_ops.softmax_cross_entropy_with_logits, diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index ec14735884..8d65e0339c 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -50,7 +50,6 @@ class Relu6Op : public XlaOpKernel { } }; - class LeakyReluOp : public XlaOpKernel { public: explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { @@ -61,9 +60,9 @@ class LeakyReluOp : public XlaOpKernel { xla::XlaBuilder* builder = ctx->builder(); auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0), static_cast(alpha_)); - ctx->SetOutput(0, - xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); + ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); } + private: float alpha_; }; @@ -115,11 +114,12 @@ class LeakyReluGradOp : public XlaOpKernel { const auto zero = xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); const auto pred = xla::Gt(ctx->Input(1), zero); - auto alpha = XlaHelpers::FloatLiteral(b, input_type(0), - static_cast(alpha_)); - ctx->SetOutput(0, - xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); + auto alpha = + XlaHelpers::FloatLiteral(b, input_type(0), static_cast(alpha_)); + ctx->SetOutput( + 0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); } + private: float alpha_; }; -- GitLab From 8530167f68673fa756565c0394bbe2dcdc39db05 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Fri, 24 Aug 2018 16:52:07 +0300 Subject: [PATCH 027/570] Add IgniteDataset that allows to work with Apache Ignite. --- configure.py | 2 + tensorflow/BUILD | 6 + tensorflow/contrib/BUILD | 15 + tensorflow/contrib/cmake/python_modules.txt | 2 + tensorflow/contrib/ignite/BUILD | 136 ++++ tensorflow/contrib/ignite/README.md | 167 ++++ tensorflow/contrib/ignite/__init__.py | 42 + .../kernels/ignite_binary_object_parser.cc | 304 +++++++ .../kernels/ignite_binary_object_parser.h | 54 ++ .../contrib/ignite/kernels/ignite_client.cc | 55 ++ .../contrib/ignite/kernels/ignite_client.h | 40 + .../contrib/ignite/kernels/ignite_dataset.cc | 123 +++ .../contrib/ignite/kernels/ignite_dataset.h | 65 ++ .../ignite/kernels/ignite_dataset_iterator.cc | 447 ++++++++++ .../ignite/kernels/ignite_dataset_iterator.h | 87 ++ .../ignite/kernels/ignite_dataset_ops.cc | 145 ++++ .../ignite/kernels/ignite_plain_client.h | 43 + .../kernels/ignite_plain_client_unix.cc | 132 +++ .../kernels/ignite_plain_client_windows.cc | 143 ++++ .../ignite/kernels/ignite_ssl_wrapper.cc | 149 ++++ .../ignite/kernels/ignite_ssl_wrapper.h | 49 ++ tensorflow/contrib/ignite/ops/dataset_ops.cc | 64 ++ .../ignite/python/ops/ignite_dataset_ops.py | 763 ++++++++++++++++++ .../ignite/python/ops/ignite_op_loader.py | 25 + .../ignite/python/tests/bin/start-plain.sh | 24 + .../ignite/python/tests/bin/start-ssl-auth.sh | 28 + .../ignite/python/tests/bin/start-ssl.sh | 26 + .../tests/config/ignite-config-plain.xml | 39 + .../tests/config/ignite-config-ssl-auth.xml | 59 ++ .../python/tests/config/ignite-config-ssl.xml | 59 ++ .../python/tests/ignite_dataset_test.py | 77 ++ .../ignite/python/tests/keystore/client.jks | Bin 0 -> 3232 bytes .../ignite/python/tests/keystore/client.pem | 69 ++ .../ignite/python/tests/keystore/server.jks | Bin 0 -> 3230 bytes .../ignite/python/tests/keystore/trust.jks | Bin 0 -> 2432 bytes .../contrib/ignite/python/tests/sql/init.sql | 20 + .../ignite/python/tests/start_ignite.sh | 30 + .../ignite/python/tests/stop_ignite.sh | 19 + 38 files changed, 3508 insertions(+) create mode 100644 tensorflow/contrib/ignite/BUILD create mode 100644 tensorflow/contrib/ignite/README.md create mode 100644 tensorflow/contrib/ignite/__init__.py create mode 100644 tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client.h create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc create mode 100644 tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h create mode 100644 tensorflow/contrib/ignite/ops/dataset_ops.cc create mode 100644 tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py create mode 100644 tensorflow/contrib/ignite/python/ops/ignite_op_loader.py create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-plain.sh create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh create mode 100755 tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml create mode 100644 tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml create mode 100644 tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/client.jks create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/client.pem create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/server.jks create mode 100644 tensorflow/contrib/ignite/python/tests/keystore/trust.jks create mode 100644 tensorflow/contrib/ignite/python/tests/sql/init.sql create mode 100755 tensorflow/contrib/ignite/python/tests/start_ignite.sh create mode 100755 tensorflow/contrib/ignite/python/tests/stop_ignite.sh diff --git a/configure.py b/configure.py index 361bd4764d..8f1957e870 100644 --- a/configure.py +++ b/configure.py @@ -1502,6 +1502,8 @@ def main(): 'with_aws_support', True, 'aws') set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', 'with_kafka_support', True, 'kafka') + set_build_var(environ_cp, 'TF_NEED_IGNITE', 'Apache Ignite', + 'with_ignite_support', True, 'ignite') set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', False, 'xla') set_build_var(environ_cp, 'TF_NEED_GDR', 'GDR', 'with_gdr_support', diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 386e0096ff..6c29c78793 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -248,6 +248,12 @@ config_setting( visibility = ["//visibility:public"], ) +config_setting( + name = "with_ignite_support", + define_values = {"with_ignite_support": "true"}, + visibility = ["//visibility:public"], +) + # Crosses between platforms and file system libraries not supported on those # platforms due to limitations in nested select() statements. config_setting( diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 798f499870..f055e643d0 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -118,6 +118,11 @@ py_library( "//tensorflow/contrib/kafka", ], "//conditions:default": [], + }) + select({ + "//tensorflow:with_ignite_support": [ + "//tensorflow/contrib/ignite", + ], + "//conditions:default": [], }) + select({ "//tensorflow:with_aws_support_windows_override": [], "//tensorflow:with_aws_support": [ @@ -160,6 +165,11 @@ cc_library( "//tensorflow/contrib/kafka:dataset_kernels", ], "//conditions:default": [], + }) + select({ + "//tensorflow:with_ignite_support": [ + "//tensorflow/contrib/ignite:dataset_kernels", + ], + "//conditions:default": [], }) + select({ "//tensorflow:with_aws_support_windows_override": [], "//tensorflow:with_aws_support": [ @@ -197,6 +207,11 @@ cc_library( "//tensorflow/contrib/kafka:dataset_ops_op_lib", ], "//conditions:default": [], + }) + select({ + "//tensorflow:with_ignite_support": [ + "//tensorflow/contrib/ignite:dataset_ops_op_lib", + ], + "//conditions:default": [], }) + select({ "//tensorflow:with_aws_support_windows_override": [], "//tensorflow:with_aws_support": [ diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index fb871acae9..56755e817a 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -207,6 +207,8 @@ tensorflow/contrib/integrate/python tensorflow/contrib/integrate/python/ops tensorflow/contrib/kafka/python tensorflow/contrib/kafka/python/ops +tensorflow/contrib/ignite/python +tensorflow/contrib/ignite/python/ops tensorflow/contrib/keras tensorflow/contrib/keras/api tensorflow/contrib/keras/api/keras diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD new file mode 100644 index 0000000000..9f6c666893 --- /dev/null +++ b/tensorflow/contrib/ignite/BUILD @@ -0,0 +1,136 @@ +package(default_visibility = ["//tensorflow:internal"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load( + "//tensorflow:tensorflow.bzl", + "tf_gen_op_wrapper_py", + "tf_kernel_library", + "tf_custom_op_library", + "tf_custom_op_py_library", + "tf_gen_op_libs", + "tf_py_test", + "if_not_windows", + "if_windows", +) + +py_library( + name = "ignite", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + deps = [ + ":dataset_ops", + ], +) + +tf_custom_op_library( + name = "_dataset_ops.so", + srcs = ["ops/dataset_ops.cc"], + deps = [":dataset_kernels"], +) + +tf_gen_op_libs( + op_lib_names = ["dataset_ops"], +) + +cc_library( + name = "dataset_kernels", + srcs = [ + "kernels/ignite_dataset_ops.cc", + "kernels/ignite_client.h", + "kernels/ignite_client.cc", + "kernels/ignite_plain_client.h", + "kernels/ignite_ssl_wrapper.h", + "kernels/ignite_ssl_wrapper.cc", + "kernels/ignite_binary_object_parser.h", + "kernels/ignite_binary_object_parser.cc", + "kernels/ignite_dataset.h", + "kernels/ignite_dataset.cc", + "kernels/ignite_dataset_iterator.h", + "kernels/ignite_dataset_iterator.cc", + ] + if_not_windows([ + "kernels/ignite_plain_client_unix.cc", + ]) + if_windows([ + "kernels/ignite_plain_client_windows.cc", + ]), + deps = [ + "//tensorflow/core:framework_headers_lib", + "//third_party/eigen3", + "@boringssl//:ssl", + "@protobuf_archive//:protobuf_headers", + ], + alwayslink = 1, +) + +py_library( + name = "dataset_ops", + srcs = [ + "python/ops/ignite_dataset_ops.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":ignite_op_loader", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", + ], +) + +tf_gen_op_wrapper_py( + name = "gen_dataset_ops", + out = "python/ops/gen_dataset_ops.py", + deps = ["//tensorflow/contrib/ignite:dataset_ops_op_lib"], +) + +tf_kernel_library( + name = "dataset_ops_kernels", + deps = [ + ":dataset_kernels", + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +tf_custom_op_py_library( + name = "ignite_op_loader", + srcs = ["python/ops/ignite_op_loader.py"], + dso = ["//tensorflow/contrib/ignite:_dataset_ops.so"], + kernels = [ + ":dataset_ops_kernels", + "//tensorflow/contrib/ignite:dataset_ops_op_lib", + ], + srcs_version = "PY2AND3", + deps = [ + ":gen_dataset_ops", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + ], +) + +# The Apache Ignite servers have to setup before the test and tear down +# after the test manually. The docker engine has to be installed. +# +# To setup Apache Ignite servers: +# $ bash ./python/tests/start_ignite.sh +# +# To tear down Apache Ignite servers: +# $ bash ./python/tests/stop_ignite.sh +tf_py_test( + name = "ignite_dataset_test", + srcs = ["python/tests/ignite_dataset_test.py"], + additional_deps = [ + ":ignite", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], + tags = [ + "manual", + "no_windows", + "notap", + ], +) diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md new file mode 100644 index 0000000000..9054344e94 --- /dev/null +++ b/tensorflow/contrib/ignite/README.md @@ -0,0 +1,167 @@ +### Ignite Dataset +# Ignite Dataset + +- [Overview](#overview) +- [Features](#features) + * [Distributed In-Memory Datasource](#distributed-in-memory-datasource) + * [Structured Objects](#structured-objects) + * [Distributed Training](#distributed-training) + * [SSL Connection](#ssl-connection) + * [Windows Support](#windows-support) +- [Try it out](#try-it-out) +- [Limitations](#limitations) + +## Overview + +[Apache Ignite](https://ignite.apache.org/) is a memory-centric distributed database, caching, and processing platform for +transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a datasource for neural network training, inference and all other computations supported by TensorFlow. + +## Features + +Ignite Dataset provides a set of features that makes it possible to use it in a wide range of cases. The most important and interesting features are described below. + +### Distributed In-Memory Datasource +[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that allows to avoid limitations of hard drive and provide high reading speed and ability to store and operate with as much data as you need in distributed cluster. Using of Ignite Dataset makes it possible to utilize all these advantages. +- If you have a **gigabyte** of data you can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations. At the same time, you can store your data in Apache Ignite on the same machine and use it as a datasource for TensorFlow and thus avoid these limitations. +- If you have a **terabyte** of data you probably still can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations again. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow and thus avoid these limitations. +- If you have a **petabyte** of data you can't keep it on a single machine. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow. + +It's important that Apache Ignite is not just a step of ETL pipeline between database or data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. Choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, in the same time, an ability to use this data for neural network training and inference. + +```bash +$ apache-ignite-fabric/bin/ignite.sh +$ apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://localhost:10800/" + +jdbc:ignite:thin://localhost/> CREATE TABLE KITTEN_CACHE (ID LONG PRIMARY KEY, NAME VARCHAR); +jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (1, 'WARM KITTY'); +jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (2, 'SOFT KITTY'); +jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL OF FUR'); +``` + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="SQL_PUBLIC_KITTEN_CACHE") +>>> iterator = dataset.make_one_shot_iterator() +>>> next_obj = iterator.get_next() +>>> +>>> with tf.Session() as sess: +>>> for _ in range(3): +>>> print(sess.run(next_obj)) + +{'key': 1, 'val': {'NAME': b'WARM KITTY'}} +{'key': 2, 'val': {'NAME': b'SOFT KITTY'}} +{'key': 3, 'val': {'NAME': b'LITTLE BALL OF FUR'}} +``` + +### Structured Objects +[Apache Ignite](https://ignite.apache.org/) allows to store any objects you would like to store. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="IMAGES") +>>> iterator = dataset.make_one_shot_iterator() +>>> next_obj = iterator.get_next() +>>> +>>> with tf.Session() as sess: +>>> print(sess.run(next_obj)) + +{ + 'key': 'kitten.png', + 'val': { + 'metadata': { + 'file_name': b'kitten.png', + 'label': b'little ball of fur', + width: 800, + height: 600 + }, + 'pixels': [0, 0, 0, 0, ..., 0] + } +} +``` + Neural network training and other computations require transformations that can be done as part of [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="IMAGES").map(lambda obj: obj['val']['pixels']) +>>> iterator = dataset.make_one_shot_iterator() +>>> next_obj = iterator.get_next() +>>> +>>> with tf.Session() as sess: +>>> print(sess.run(next_obj)) + +[0, 0, 0, 0, ..., 0] +``` + +### Distributed Training + +TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is an ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. + + + +Utilizing this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottleneck. + +Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL) we can specify the number of partitions the data will be partitioned on. If, for example, Apache Ignite cluster consists of 10 machines and we creates cache with 10 partitions then every machine will maintain approximately one data partition. + +Ignite Dataset allows to utilize these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that might be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach we are able to assign specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset("IMAGES") +>>> +>>> # Compute gradients locally on every worker node. +>>> gradients = [] +>>> for i in range(5): +>>> with tf.device("/job:WORKER/task:%d" % i): +>>> device_iterator = dataset.make_one_shot_iterator() +>>> device_next_obj = device_iterator.get_next() +>>> gradient = compute_gradient(device_next_obj) +>>> gradients.append(gradient) +>>> +>>> # Aggregate them on master node. +>>> result_gradient = tf.reduce_sum(gradients) +>>> +>>> with tf.Session("grpc://localhost:10000") as sess: +>>> print(sess.run(result_gradient)) +``` + +High-level TensorFlow API for [distributed training](https://www.tensorflow.org/api_docs/python/tf/contrib/distribute/DistributionStrategy) is supported as well. + +### SSL Connection + +Your data should not be accessible without any control. Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information please see [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation. + +```python +>>> import tensorflow as tf +>>> from tensorflow.contrib.ignite import IgniteDataset +>>> +>>> dataset = IgniteDataset(cache_name="IMAGES", certfile="client.pem", cert_password="password", username="ignite", password="ignite") +>>> ... +``` + +### Windows Support + +Ignite Dataset is fully compatible with Windows, so you can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems. + +## Try it out + +The simplest way to try Ignite Dataset out is to run [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and then interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine: + +``` +docker run -it -p 10800:10800 dmitrievanthony/ignite-with-mnist +``` + +After that you will be able to work with it following way: + +![ignite-dataset-mnist](https://s3.amazonaws.com/helloworld23423423ew23/ignite-dataset-mnist.png "Ignite Dataset Mnist") + +## Limitations + +Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures. \ No newline at end of file diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py new file mode 100644 index 0000000000..468920a557 --- /dev/null +++ b/tensorflow/contrib/ignite/__init__.py @@ -0,0 +1,42 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Apache Ignite is a memory-centric distributed database, caching, and + processing platform for transactional, analytical, and streaming workloads, + delivering in-memory speeds at petabyte scale. This contrib package + contains an integration between Apache Ignite and TensorFlow. The + integration is based on tf.data from TensorFlow side and Binary Client + Protocol from Apache Ignite side. It allows to use Apache Ignite as a + datasource for neural network training, inference and all other + computations supported by TensorFlow. Ignite Dataset is based on Apache + Ignite Binary Client Protocol: + https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. + +@@IgniteDataset +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops \ +import IgniteDataset + +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + "IgniteDataset", +] + +remove_undocumented(__name__) diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc new file mode 100644 index 0000000000..bf0ef8766e --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc @@ -0,0 +1,304 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_binary_object_parser.h" + +namespace ignite { + +tensorflow::Status BinaryObjectParser::Parse( + uint8_t*& ptr, std::vector& out_tensors, + std::vector& types) { + uint8_t object_type_id = *ptr; + ptr += 1; + + switch (object_type_id) { + case BYTE: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_UINT8, {}); + tensor.scalar()() = *((uint8_t*)ptr); + ptr += 1; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case SHORT: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT16, {}); + tensor.scalar()() = *((int16_t*)ptr); + ptr += 2; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case INT: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT32, {}); + tensor.scalar()() = *((int32_t*)ptr); + ptr += 4; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case LONG: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT64, {}); + tensor.scalar()() = *((int64_t*)ptr); + ptr += 8; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case FLOAT: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_FLOAT, {}); + tensor.scalar()() = *((float*)ptr); + ptr += 4; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case DOUBLE: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_DOUBLE, {}); + tensor.scalar()() = *((double*)ptr); + ptr += 8; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case UCHAR: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_UINT16, {}); + tensor.scalar()() = *((uint16_t*)ptr); + ptr += 2; + out_tensors.emplace_back(std::move(tensor)); + break; + } + case BOOL: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_BOOL, {}); + tensor.scalar()() = *((bool*)ptr); + ptr += 1; + out_tensors.emplace_back(std::move(tensor)); + + break; + } + case STRING: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_STRING, {}); + tensor.scalar()() = std::string((char*)ptr, length); + ptr += length; + out_tensors.emplace_back(std::move(tensor)); + + break; + } + case DATE: { + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT64, {}); + tensor.scalar()() = *((int64_t*)ptr); + ptr += 8; + out_tensors.emplace_back(std::move(tensor)); + + break; + } + case BYTE_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_UINT8, + tensorflow::TensorShape({length})); + + uint8_t* arr = (uint8_t*)ptr; + ptr += length; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case SHORT_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT16, + tensorflow::TensorShape({length})); + + int16_t* arr = (int16_t*)ptr; + ptr += length * 2; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case INT_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT32, + tensorflow::TensorShape({length})); + + int32_t* arr = (int32_t*)ptr; + ptr += length * 4; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case LONG_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT64, + tensorflow::TensorShape({length})); + + int64_t* arr = (int64_t*)ptr; + ptr += length * 8; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case FLOAT_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_FLOAT, + tensorflow::TensorShape({length})); + + float* arr = (float*)ptr; + ptr += 4 * length; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case DOUBLE_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_DOUBLE, + tensorflow::TensorShape({length})); + + double* arr = (double*)ptr; + ptr += 8 * length; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case UCHAR_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_UINT16, + tensorflow::TensorShape({length})); + + uint16_t* arr = (uint16_t*)ptr; + ptr += length * 2; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case BOOL_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_BOOL, + tensorflow::TensorShape({length})); + + bool* arr = (bool*)ptr; + ptr += length; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case STRING_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_STRING, + tensorflow::TensorShape({length})); + + for (int32_t i = 0; i < length; i++) { + int32_t str_length = *((int32_t*)ptr); + ptr += 4; + const int8_t* str = (const int8_t*)ptr; + ptr += str_length; + tensor.vec()(i) = std::string((char*)str, str_length); + } + + out_tensors.emplace_back(std::move(tensor)); + break; + } + case DATE_ARR: { + int32_t length = *((int32_t*)ptr); + ptr += 4; + tensorflow::Tensor tensor(tensorflow::cpu_allocator(), + tensorflow::DT_INT64, + tensorflow::TensorShape({length})); + int64_t* arr = (int64_t*)ptr; + ptr += length * 8; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors.emplace_back(std::move(tensor)); + break; + } + case WRAPPED_OBJ: { + int32_t byte_arr_size = *((int32_t*)ptr); + ptr += 4; + + tensorflow::Status status = Parse(ptr, out_tensors, types); + if (!status.ok()) return status; + + int32_t offset = *((int32_t*)ptr); + ptr += 4; + + break; + } + case COMPLEX_OBJ: { + uint8_t version = *ptr; + ptr += 1; + int16_t flags = *((int16_t*)ptr); // USER_TYPE = 1, HAS_SCHEMA = 2 + ptr += 2; + int32_t type_id = *((int32_t*)ptr); + ptr += 4; + int32_t hash_code = *((int32_t*)ptr); + ptr += 4; + int32_t length = *((int32_t*)ptr); + ptr += 4; + int32_t schema_id = *((int32_t*)ptr); + ptr += 4; + int32_t schema_offset = *((int32_t*)ptr); + ptr += 4; + + uint8_t* end = ptr + schema_offset - 24; + int32_t i = 0; + while (ptr < end) { + i++; + tensorflow::Status status = Parse(ptr, out_tensors, types); + if (!status.ok()) return status; + } + + ptr += (length - schema_offset); + + break; + } + default: { + return tensorflow::errors::Internal("Unknowd binary type (type id ", + (int)object_type_id, ")"); + } + } + + return tensorflow::Status::OK(); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h new file mode 100644 index 0000000000..1e845cbc56 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h @@ -0,0 +1,54 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/lib/core/status.h" + +namespace ignite { + +class BinaryObjectParser { + public: + tensorflow::Status Parse(uint8_t*& ptr, + std::vector& out_tensors, + std::vector& types); +}; + +enum ObjectType { + BYTE = 1, + SHORT = 2, + INT = 3, + LONG = 4, + FLOAT = 5, + DOUBLE = 6, + UCHAR = 7, + BOOL = 8, + STRING = 9, + DATE = 11, + BYTE_ARR = 12, + SHORT_ARR = 13, + INT_ARR = 14, + LONG_ARR = 15, + FLOAT_ARR = 16, + DOUBLE_ARR = 17, + UCHAR_ARR = 18, + BOOL_ARR = 19, + STRING_ARR = 20, + DATE_ARR = 22, + WRAPPED_OBJ = 27, + COMPLEX_OBJ = 103 +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.cc b/tensorflow/contrib/ignite/kernels/ignite_client.cc new file mode 100644 index 0000000000..5a8eddb944 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_client.cc @@ -0,0 +1,55 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef IGNITE_CLIENT_H +#define IGNITE_CLIENT_H +#include "ignite_client.h" +#endif + +namespace ignite { + +tensorflow::Status Client::ReadByte(uint8_t& data) { + return ReadData((uint8_t*)&data, 1); +} + +tensorflow::Status Client::ReadShort(int16_t& data) { + return ReadData((uint8_t*)&data, 2); +} + +tensorflow::Status Client::ReadInt(int32_t& data) { + return ReadData((uint8_t*)&data, 4); +} + +tensorflow::Status Client::ReadLong(int64_t& data) { + return ReadData((uint8_t*)&data, 8); +} + +tensorflow::Status Client::WriteByte(uint8_t data) { + return WriteData((uint8_t*)&data, 1); +} + +tensorflow::Status Client::WriteShort(int16_t data) { + return WriteData((uint8_t*)&data, 2); +} + +tensorflow::Status Client::WriteInt(int32_t data) { + return WriteData((uint8_t*)&data, 4); +} + +tensorflow::Status Client::WriteLong(int64_t data) { + return WriteData((uint8_t*)&data, 8); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h new file mode 100644 index 0000000000..64e28d75f0 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_client.h @@ -0,0 +1,40 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/lib/core/status.h" + +namespace ignite { + +class Client { + public: + virtual tensorflow::Status Connect() = 0; + virtual tensorflow::Status Disconnect() = 0; + virtual bool IsConnected() = 0; + virtual int GetSocketDescriptor() = 0; + + virtual tensorflow::Status ReadByte(uint8_t& data); + virtual tensorflow::Status ReadShort(int16_t& data); + virtual tensorflow::Status ReadInt(int32_t& data); + virtual tensorflow::Status ReadLong(int64_t& data); + virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length) = 0; + + virtual tensorflow::Status WriteByte(uint8_t data); + virtual tensorflow::Status WriteShort(int16_t data); + virtual tensorflow::Status WriteInt(int32_t data); + virtual tensorflow::Status WriteLong(int64_t data); + virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length) = 0; +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc new file mode 100644 index 0000000000..a9bf26955b --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc @@ -0,0 +1,123 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_dataset_iterator.h" +#include "tensorflow/core/platform/logging.h" + +namespace ignite { + +IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx, + std::string cache_name, std::string host, + tensorflow::int32 port, bool local, + tensorflow::int32 part, + tensorflow::int32 page_size, std::string username, + std::string password, std::string certfile, + std::string keyfile, std::string cert_password, + std::vector schema, + std::vector permutation) + : DatasetBase(tensorflow::DatasetContext(ctx)), + cache_name(cache_name), + host(host), + port(port), + local(local), + part(part), + page_size(page_size), + username(username), + password(password), + certfile(certfile), + keyfile(keyfile), + cert_password(cert_password), + schema(schema), + permutation(permutation) { + SchemaToTypes(); + SchemaToShapes(); + + LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name + << "', host='" << host << "', port=" << port << ", local=" << local + << ", part=" << part << ", page_size=" << page_size + << ", username='" << username << "', certfile='" << certfile + << "', keyfile='" << keyfile + "']"; +} + +IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; } + +std::unique_ptr IgniteDataset::MakeIteratorInternal( + const tensorflow::string& prefix) const { + return std::unique_ptr(new IgniteDatasetIterator( + {this, tensorflow::strings::StrCat(prefix, "::Ignite")}, this->host, + this->port, this->cache_name, this->local, this->part, this->page_size, + this->username, this->password, this->certfile, this->keyfile, + this->cert_password, this->schema, this->permutation)); +} + +const tensorflow::DataTypeVector& IgniteDataset::output_dtypes() const { + return dtypes; +} + +const std::vector& +IgniteDataset::output_shapes() const { + return shapes; +} + +tensorflow::string IgniteDataset::DebugString() const { + return "IgniteDatasetOp::Dataset"; +} + +tensorflow::Status IgniteDataset::AsGraphDefInternal( + tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b, + tensorflow::Node** output) const { + return tensorflow::errors::Unimplemented( + "IgniteDataset does not support 'AsGraphDefInternal'"); +} + +void IgniteDataset::SchemaToTypes() { + for (auto e : schema) { + if (e == BYTE || e == BYTE_ARR) { + dtypes.push_back(tensorflow::DT_UINT8); + } else if (e == SHORT || e == SHORT_ARR) { + dtypes.push_back(tensorflow::DT_INT16); + } else if (e == INT || e == INT_ARR) { + dtypes.push_back(tensorflow::DT_INT32); + } else if (e == LONG || e == LONG_ARR) { + dtypes.push_back(tensorflow::DT_INT64); + } else if (e == FLOAT || e == FLOAT_ARR) { + dtypes.push_back(tensorflow::DT_FLOAT); + } else if (e == DOUBLE || e == DOUBLE_ARR) { + dtypes.push_back(tensorflow::DT_DOUBLE); + } else if (e == UCHAR || e == UCHAR_ARR) { + dtypes.push_back(tensorflow::DT_UINT8); + } else if (e == BOOL || e == BOOL_ARR) { + dtypes.push_back(tensorflow::DT_BOOL); + } else if (e == STRING || e == STRING_ARR) { + dtypes.push_back(tensorflow::DT_STRING); + } else { + LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; + } + } +} + +void IgniteDataset::SchemaToShapes() { + for (auto e : schema) { + if (e >= 1 && e < 10) { + shapes.push_back(tensorflow::PartialTensorShape({})); + } else if (e >= 12 && e < 21) { + shapes.push_back(tensorflow::PartialTensorShape({-1})); + } else { + LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; + } + } +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h new file mode 100644 index 0000000000..2120dfd342 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h @@ -0,0 +1,65 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/dataset.h" + +namespace ignite { + +class IgniteDataset : public tensorflow::DatasetBase { + public: + IgniteDataset(tensorflow::OpKernelContext* ctx, std::string cache_name, + std::string host, tensorflow::int32 port, bool local, + tensorflow::int32 part, tensorflow::int32 page_size, + std::string username, std::string password, + std::string certfile, std::string keyfile, + std::string cert_password, + std::vector schema, + std::vector permutation); + ~IgniteDataset(); + std::unique_ptr MakeIteratorInternal( + const tensorflow::string& prefix) const override; + const tensorflow::DataTypeVector& output_dtypes() const override; + const std::vector& output_shapes() + const override; + tensorflow::string DebugString() const override; + + protected: + tensorflow::Status AsGraphDefInternal( + tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b, + tensorflow::Node** output) const override; + + private: + const std::string cache_name; + const std::string host; + const tensorflow::int32 port; + const bool local; + const tensorflow::int32 part; + const tensorflow::int32 page_size; + const std::string username; + const std::string password; + const std::string certfile; + const std::string keyfile; + const std::string cert_password; + const std::vector schema; + const std::vector permutation; + + tensorflow::DataTypeVector dtypes; + std::vector shapes; + + void SchemaToTypes(); + void SchemaToShapes(); +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc new file mode 100644 index 0000000000..03cc3c1291 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc @@ -0,0 +1,447 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_dataset_iterator.h" + +#include "ignite_plain_client.h" +#include "ignite_ssl_wrapper.h" +#include "tensorflow/core/platform/logging.h" + +#include +#include + +namespace ignite { + +#define CHECK_STATUS(status) \ + if (!status.ok()) return status; + +IgniteDatasetIterator::IgniteDatasetIterator( + const Params& params, std::string host, tensorflow::int32 port, + std::string cache_name, bool local, tensorflow::int32 part, + tensorflow::int32 page_size, std::string username, std::string password, + std::string certfile, std::string keyfile, std::string cert_password, + std::vector schema, + std::vector permutation) + : tensorflow::DatasetIterator(params), + cache_name(cache_name), + local(local), + part(part), + page_size(page_size), + username(username), + password(password), + schema(schema), + permutation(permutation), + remainder(-1), + cursor_id(-1), + last_page(false) { + Client* p_client = new PlainClient(host, port); + + if (certfile.empty()) + client = std::unique_ptr(p_client); + else + client = std::unique_ptr(new SslWrapper( + std::unique_ptr(p_client), certfile, keyfile, cert_password)); + + LOG(INFO) << "Ignite Dataset Iterator created"; +} + +IgniteDatasetIterator::~IgniteDatasetIterator() { + tensorflow::Status status = CloseConnection(); + if (!status.ok()) LOG(ERROR) << status.ToString(); + + LOG(INFO) << "Ignite Dataset Iterator destroyed"; +} + +tensorflow::Status IgniteDatasetIterator::EstablishConnection() { + if (!client->IsConnected()) { + tensorflow::Status status = client->Connect(); + if (!status.ok()) return status; + + status = Handshake(); + if (!status.ok()) { + tensorflow::Status disconnect_status = client->Disconnect(); + if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString(); + + return status; + } + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::CloseConnection() { + if (cursor_id != -1 && !last_page) { + tensorflow::Status conn_status = EstablishConnection(); + if (!conn_status.ok()) return conn_status; + + CHECK_STATUS(client->WriteInt(18)); // Message length + CHECK_STATUS( + client->WriteShort(close_connection_opcode)); // Operation code + CHECK_STATUS(client->WriteLong(0)); // Request ID + CHECK_STATUS(client->WriteLong(cursor_id)); // Resource ID + + int32_t res_len; + CHECK_STATUS(client->ReadInt(res_len)); + if (res_len < 12) + return tensorflow::errors::Internal( + "Close Resource Response is corrupted"); + + int64_t req_id; + CHECK_STATUS(client->ReadLong(req_id)); + int32_t status; + CHECK_STATUS(client->ReadInt(status)); + if (status != 0) { + uint8_t err_msg_header; + CHECK_STATUS(client->ReadByte(err_msg_header)); + if (err_msg_header == string_val) { + int32_t err_msg_length; + CHECK_STATUS(client->ReadInt(err_msg_length)); + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + std::string err_msg((char*)err_msg_c, err_msg_length); + delete[] err_msg_c; + + return tensorflow::errors::Internal("Close Resource Error [status=", + status, ", message=", err_msg, "]"); + } + return tensorflow::errors::Internal("Close Resource Error [status=", + status, "]"); + } + + LOG(INFO) << "Query Cursor " << cursor_id << " is closed"; + + cursor_id = -1; + + return client->Disconnect(); + } else { + LOG(INFO) << "Query Cursor " << cursor_id << " is already closed"; + } + + return client->IsConnected() ? client->Disconnect() + : tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::GetNextInternal( + tensorflow::IteratorContext* ctx, + std::vector* out_tensors, bool* end_of_sequence) { + if (remainder == 0 && last_page) { + LOG(INFO) << "Query Cursor " << cursor_id << " is closed"; + + cursor_id = -1; + *end_of_sequence = true; + return tensorflow::Status::OK(); + } else { + tensorflow::Status status = EstablishConnection(); + if (!status.ok()) return status; + + if (remainder == -1 || remainder == 0) { + tensorflow::Status status = + remainder == -1 ? ScanQuery() : LoadNextPage(); + if (!status.ok()) return status; + } + + uint8_t* initial_ptr = ptr; + std::vector types; + std::vector tensors; + + status = parser.Parse(ptr, tensors, types); // Parse key + if (!status.ok()) return status; + + status = parser.Parse(ptr, tensors, types); // Parse val + if (!status.ok()) return status; + + remainder -= (ptr - initial_ptr); + + out_tensors->resize(tensors.size()); + for (int32_t i = 0; i < tensors.size(); i++) + (*out_tensors)[permutation[i]] = std::move(tensors[i]); + + *end_of_sequence = false; + return tensorflow::Status::OK(); + } + + *end_of_sequence = true; + return tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::SaveInternal( + tensorflow::IteratorStateWriter* writer) { + return tensorflow::errors::Unimplemented( + "Iterator for IgniteDataset does not support 'SaveInternal'"); +} + +tensorflow::Status IgniteDatasetIterator::RestoreInternal( + tensorflow::IteratorContext* ctx, tensorflow::IteratorStateReader* reader) { + return tensorflow::errors::Unimplemented( + "Iterator for IgniteDataset does not support 'RestoreInternal')"); +} + +tensorflow::Status IgniteDatasetIterator::Handshake() { + int32_t msg_len = 8; + + if (username.empty()) + msg_len += 1; + else + msg_len += 5 + username.length(); + + if (password.empty()) + msg_len += 1; + else + msg_len += 5 + password.length(); + + CHECK_STATUS(client->WriteInt(msg_len)); + CHECK_STATUS(client->WriteByte(1)); + CHECK_STATUS(client->WriteShort(protocol_major_version)); + CHECK_STATUS(client->WriteShort(protocol_minor_version)); + CHECK_STATUS(client->WriteShort(protocol_patch_version)); + CHECK_STATUS(client->WriteByte(2)); + if (username.empty()) { + CHECK_STATUS(client->WriteByte(null_val)); + } else { + CHECK_STATUS(client->WriteByte(string_val)); + CHECK_STATUS(client->WriteInt(username.length())); + CHECK_STATUS( + client->WriteData((uint8_t*)username.c_str(), username.length())); + } + + if (password.empty()) { + CHECK_STATUS(client->WriteByte(null_val)); + } else { + CHECK_STATUS(client->WriteByte(string_val)); + CHECK_STATUS(client->WriteInt(password.length())); + CHECK_STATUS( + client->WriteData((uint8_t*)password.c_str(), password.length())); + } + + int32_t handshake_res_len; + CHECK_STATUS(client->ReadInt(handshake_res_len)); + uint8_t handshake_res; + CHECK_STATUS(client->ReadByte(handshake_res)); + + LOG(INFO) << "Handshake length " << handshake_res_len << ", res " + << (int16_t)handshake_res; + + if (handshake_res != 1) { + int16_t serv_ver_major; + CHECK_STATUS(client->ReadShort(serv_ver_major)); + int16_t serv_ver_minor; + CHECK_STATUS(client->ReadShort(serv_ver_minor)); + int16_t serv_ver_patch; + CHECK_STATUS(client->ReadShort(serv_ver_patch)); + uint8_t header; + CHECK_STATUS(client->ReadByte(header)); + + if (header == string_val) { + int32_t length; + CHECK_STATUS(client->ReadInt(length)); + uint8_t* err_msg_c = new uint8_t[length]; + CHECK_STATUS(client->ReadData(err_msg_c, length)); + std::string err_msg((char*)err_msg_c, length); + delete[] err_msg_c; + + return tensorflow::errors::Internal( + "Handshake Error [result=", handshake_res, ", version=", + serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, + ", message='", err_msg, "']"); + } else if (header == null_val) { + return tensorflow::errors::Internal( + "Handshake Error [result=", handshake_res, ", version=", + serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]"); + } else { + return tensorflow::errors::Internal( + "Handshake Error [result=", handshake_res, ", version=", + serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]"); + } + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::ScanQuery() { + CHECK_STATUS(client->WriteInt(25)); // Message length + CHECK_STATUS(client->WriteShort(scan_query_opcode)); // Operation code + CHECK_STATUS(client->WriteLong(0)); // Request ID + CHECK_STATUS(client->WriteInt(JavaHashCode(cache_name))); // Cache name + CHECK_STATUS(client->WriteByte(0)); // Flags + CHECK_STATUS(client->WriteByte(null_val)); // Filter object + CHECK_STATUS(client->WriteInt(page_size)); // Cursor page size + CHECK_STATUS(client->WriteInt(part)); // Partition to query + CHECK_STATUS(client->WriteByte(local)); // Local flag + + int64_t wait_start = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + int32_t res_len; + CHECK_STATUS(client->ReadInt(res_len)); + + int64_t wait_stop = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms"; + + if (res_len < 12) + return tensorflow::errors::Internal("Scan Query Response is corrupted"); + + int64_t req_id; + CHECK_STATUS(client->ReadLong(req_id)); + + int32_t status; + CHECK_STATUS(client->ReadInt(status)); + + if (status != 0) { + uint8_t err_msg_header; + CHECK_STATUS(client->ReadByte(err_msg_header)); + + if (err_msg_header == string_val) { + int32_t err_msg_length; + CHECK_STATUS(client->ReadInt(err_msg_length)); + + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + std::string err_msg((char*)err_msg_c, err_msg_length); + delete[] err_msg_c; + + return tensorflow::errors::Internal("Scan Query Error [status=", status, + ", message=", err_msg, "]"); + } + return tensorflow::errors::Internal("Scan Query Error [status=", status, + "]"); + } + + CHECK_STATUS(client->ReadLong(cursor_id)); + + LOG(INFO) << "Query Cursor " << cursor_id << " is opened"; + + int32_t row_cnt; + CHECK_STATUS(client->ReadInt(row_cnt)); + + remainder = res_len - 25; + page = std::unique_ptr(new uint8_t[remainder]); + ptr = page.get(); + + int64_t start = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + CHECK_STATUS(client->ReadData(ptr, remainder)); + + int64_t stop = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + ; + + double size_in_mb = 1.0 * remainder / 1024 / 1024; + double time_in_s = 1.0 * (stop - start) / 1000; + LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 + << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; + + uint8_t last_page_b; + CHECK_STATUS(client->ReadByte(last_page_b)); + + last_page = !last_page_b; + + return tensorflow::Status::OK(); +} + +tensorflow::Status IgniteDatasetIterator::LoadNextPage() { + CHECK_STATUS(client->WriteInt(18)); // Message length + CHECK_STATUS(client->WriteShort(load_next_page_opcode)); // Operation code + CHECK_STATUS(client->WriteLong(0)); // Request ID + CHECK_STATUS(client->WriteLong(cursor_id)); // Cursor ID + + int64_t wait_start = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + int32_t res_len; + CHECK_STATUS(client->ReadInt(res_len)); + + int64_t wait_stop = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms"; + + if (res_len < 12) + return tensorflow::errors::Internal("Load Next Page Response is corrupted"); + + int64_t req_id; + CHECK_STATUS(client->ReadLong(req_id)); + + int32_t status; + CHECK_STATUS(client->ReadInt(status)); + + if (status != 0) { + uint8_t err_msg_header; + CHECK_STATUS(client->ReadByte(err_msg_header)); + + if (err_msg_header == string_val) { + int32_t err_msg_length; + CHECK_STATUS(client->ReadInt(err_msg_length)); + + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + std::string err_msg((char*)err_msg_c, err_msg_length); + delete[] err_msg_c; + + return tensorflow::errors::Internal("Load Next Page Error [status=", + status, ", message=", err_msg, "]"); + } + return tensorflow::errors::Internal("Load Next Page Error [status=", status, + "]"); + } + + int32_t row_cnt; + CHECK_STATUS(client->ReadInt(row_cnt)); + + remainder = res_len - 17; + page = std::unique_ptr(new uint8_t[remainder]); + ptr = page.get(); + + int64_t start = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + + CHECK_STATUS(client->ReadData(ptr, remainder)); + + int64_t stop = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + ; + + double size_in_mb = 1.0 * remainder / 1024 / 1024; + double time_in_s = 1.0 * (stop - start) / 1000; + LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 + << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; + + uint8_t last_page_b; + CHECK_STATUS(client->ReadByte(last_page_b)); + + last_page = !last_page_b; + + return tensorflow::Status::OK(); +} + +int32_t IgniteDatasetIterator::JavaHashCode(std::string str) { + int32_t h = 0; + for (char& c : str) { + h = 31 * h + c; + } + return h; +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h new file mode 100644 index 0000000000..d1df4527f9 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h @@ -0,0 +1,87 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_binary_object_parser.h" +#include "ignite_dataset.h" + +#ifndef IGNITE_CLIENT_H +#define IGNITE_CLIENT_H +#include "ignite_client.h" +#endif + +namespace ignite { + +class IgniteDatasetIterator + : public tensorflow::DatasetIterator { + public: + IgniteDatasetIterator(const Params& params, std::string host, + tensorflow::int32 port, std::string cache_name, + bool local, tensorflow::int32 part, + tensorflow::int32 page_size, std::string username, + std::string password, std::string certfile, + std::string keyfile, std::string cert_password, + std::vector schema, + std::vector permutation); + ~IgniteDatasetIterator(); + tensorflow::Status GetNextInternal( + tensorflow::IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override; + + protected: + tensorflow::Status SaveInternal( + tensorflow::IteratorStateWriter* writer) override; + tensorflow::Status RestoreInternal( + tensorflow::IteratorContext* ctx, + tensorflow::IteratorStateReader* reader) override; + + private: + std::unique_ptr client; + BinaryObjectParser parser; + + const std::string cache_name; + const bool local; + const tensorflow::int32 part; + const tensorflow::int32 page_size; + const std::string username; + const std::string password; + const std::vector schema; + const std::vector permutation; + + int32_t remainder; + int64_t cursor_id; + bool last_page; + + std::unique_ptr page; + uint8_t* ptr; + + tensorflow::Status EstablishConnection(); + tensorflow::Status CloseConnection(); + tensorflow::Status Handshake(); + tensorflow::Status ScanQuery(); + tensorflow::Status LoadNextPage(); + int32_t JavaHashCode(std::string str); +}; + +constexpr uint8_t null_val = 101; +constexpr uint8_t string_val = 9; +constexpr uint8_t protocol_major_version = 1; +constexpr uint8_t protocol_minor_version = 1; +constexpr uint8_t protocol_patch_version = 0; +constexpr int16_t scan_query_opcode = 2000; +constexpr int16_t load_next_page_opcode = 2001; +constexpr int16_t close_connection_opcode = 0; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc new file mode 100644 index 0000000000..543b5e4afc --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -0,0 +1,145 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_dataset.h" +#include +#include "tensorflow/core/framework/dataset.h" + +namespace tensorflow { + +class IgniteDatasetOp : public DatasetOpKernel { + public: + using DatasetOpKernel::DatasetOpKernel; + + void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { + std::string cache_name = ""; + std::string host = ""; + int32 port = -1; + bool local = false; + int32 part = -1; + int32 page_size = -1; + std::string username = ""; + std::string password = ""; + std::string certfile = ""; + std::string keyfile = ""; + std::string cert_password = ""; + + const char* env_cache_name = std::getenv("IGNITE_DATASET_CACHE_NAME"); + const char* env_host = std::getenv("IGNITE_DATASET_HOST"); + const char* env_port = std::getenv("IGNITE_DATASET_PORT"); + const char* env_local = std::getenv("IGNITE_DATASET_LOCAL"); + const char* env_part = std::getenv("IGNITE_DATASET_PART"); + const char* env_page_size = std::getenv("IGNITE_DATASET_PAGE_SIZE"); + const char* env_username = std::getenv("IGNITE_DATASET_USERNAME"); + const char* env_password = std::getenv("IGNITE_DATASET_PASSWORD"); + const char* env_certfile = std::getenv("IGNITE_DATASET_CERTFILE"); + const char* env_keyfile = std::getenv("IGNITE_DATASET_KEYFILE"); + const char* env_cert_password = std::getenv("IGNITE_DATASET_CERT_PASSWORD"); + + if (env_cache_name) + cache_name = std::string(env_cache_name); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cache_name", + &cache_name)); + + if (env_host) + host = std::string(env_host); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "host", &host)); + + if (env_port) + port = atoi(env_port); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "port", &port)); + + if (env_local) + local = true; + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "local", &local)); + + if (env_part) + part = atoi(env_part); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "part", &part)); + + if (env_page_size) + page_size = atoi(env_page_size); + else + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "page_size", &page_size)); + + if (env_username) + username = std::string(env_username); + else + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "username", &username)); + + if (env_password) + password = std::string(env_password); + else + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "password", &password)); + + if (env_certfile) + certfile = std::string(env_certfile); + else + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "certfile", &certfile)); + + if (env_keyfile) + keyfile = std::string(env_keyfile); + else + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "keyfile", &keyfile)); + + if (env_cert_password) + cert_password = std::string(env_cert_password); + else + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cert_password", + &cert_password)); + + const Tensor* schema_tensor; + OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor)); + OP_REQUIRES(ctx, schema_tensor->dims() == 1, + errors::InvalidArgument("`schema` must be a vector.")); + + std::vector schema; + schema.reserve(schema_tensor->NumElements()); + for (int i = 0; i < schema_tensor->NumElements(); i++) { + schema.push_back(schema_tensor->flat()(i)); + } + + const Tensor* permutation_tensor; + OP_REQUIRES_OK(ctx, ctx->input("permutation", &permutation_tensor)); + OP_REQUIRES(ctx, schema_tensor->dims() == 1, + errors::InvalidArgument("`permutation` must be a vector.")); + + std::vector permutation; + permutation.reserve(permutation_tensor->NumElements()); + for (int i = 0; i < permutation_tensor->NumElements(); i++) { + permutation.push_back(permutation_tensor->flat()(i)); + } + + *output = new ignite::IgniteDataset( + ctx, cache_name, host, port, local, part, page_size, username, password, + certfile, keyfile, cert_password, std::move(schema), + std::move(permutation)); + } +}; + +REGISTER_KERNEL_BUILDER(Name("IgniteDataset").Device(DEVICE_CPU), + IgniteDatasetOp); + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h new file mode 100644 index 0000000000..5491af68d6 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef IGNITE_CLIENT_H +#define IGNITE_CLIENT_H +#include "ignite_client.h" +#endif + +#include + +namespace ignite { + +class PlainClient : public Client { + public: + PlainClient(std::string host, int port); + ~PlainClient(); + + virtual tensorflow::Status Connect(); + virtual tensorflow::Status Disconnect(); + virtual bool IsConnected(); + virtual int GetSocketDescriptor(); + virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length); + virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length); + + private: + std::string host; + int port; + int sock; +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc new file mode 100644 index 0000000000..dbfa4f8786 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc @@ -0,0 +1,132 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_plain_client.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" + +namespace ignite { + +PlainClient::PlainClient(std::string host, int port) + : host(host), port(port), sock(-1) {} + +PlainClient::~PlainClient() { + if (IsConnected()) { + tensorflow::Status status = Disconnect(); + if (!status.ok()) LOG(WARNING) << status.ToString(); + } +} + +tensorflow::Status PlainClient::Connect() { + if (sock == -1) { + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock == -1) + return tensorflow::errors::Internal("Failed to create socket"); + } + + sockaddr_in server; + + server.sin_addr.s_addr = inet_addr(host.c_str()); + if (server.sin_addr.s_addr == -1) { + hostent* he; + in_addr** addr_list; + + if ((he = gethostbyname(host.c_str())) == NULL) + return tensorflow::errors::Internal("Failed to resolve hostname \"", host, + "\""); + + addr_list = (in_addr**)he->h_addr_list; + if (addr_list[0] != NULL) server.sin_addr = *addr_list[0]; + } + + server.sin_family = AF_INET; + server.sin_port = htons(port); + + if (connect(sock, (sockaddr*)&server, sizeof(server)) < 0) + return tensorflow::errors::Internal("Failed to connect to \"", host, ":", + port, "\""); + + LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established"; + + return tensorflow::Status::OK(); +} + +tensorflow::Status PlainClient::Disconnect() { + int close_res = close(sock); + sock = -1; + + LOG(INFO) << "Connection to \"" << host << ":" << port << "\" is closed"; + + return close_res == 0 ? tensorflow::Status::OK() + : tensorflow::errors::Internal( + "Failed to correctly close connection"); +} + +bool PlainClient::IsConnected() { return sock != -1; } + +int PlainClient::GetSocketDescriptor() { return sock; } + +tensorflow::Status PlainClient::ReadData(uint8_t* buf, int32_t length) { + int recieved = 0; + + while (recieved < length) { + int res = recv(sock, buf, length - recieved, 0); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while reading from socket: ", res, ", ", + std::string(strerror(errno))); + + if (res == 0) + return tensorflow::errors::Internal("Server closed connection"); + + recieved += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status PlainClient::WriteData(uint8_t* buf, int32_t length) { + int sent = 0; + + while (sent < length) { + int res = send(sock, buf, length - sent, 0); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while writing into socket: ", res, ", ", + std::string(strerror(errno))); + + sent += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc new file mode 100644 index 0000000000..f78c9b3627 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -0,0 +1,143 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_plain_client.h" + +#define WIN32_LEAN_AND_MEAN +#include +#include +#include + +#pragma comment(lib, "Ws2_32.lib") +#pragma comment(lib, "Mswsock.lib") +#pragma comment(lib, "AdvApi32.lib") + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" + +namespace ignite { + +PlainClient::PlainClient(std::string host, int port) + : host(host), port(port), sock(INVALID_SOCKET) {} + +PlainClient::~PlainClient() { + if (IsConnected()) { + tensorflow::Status status = Disconnect(); + if (!status.ok()) LOG(WARNING) << status.ToString(); + } +} + +tensorflow::Status PlainClient::Connect() { + WSADATA wsaData; + addrinfo *result = NULL, *ptr = NULL, hints; + + int res = WSAStartup(MAKEWORD(2, 2), &wsaData); + if (res != 0) + return tensorflow::errors::Internal("WSAStartup failed with error: ", res); + + ZeroMemory(&hints, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_protocol = IPPROTO_TCP; + + res = + getaddrinfo(host.c_str(), std::to_string(port).c_str(), &hints, &result); + if (res != 0) + return tensorflow::errors::Internal("Getaddrinfo failed with error: ", res); + + for (ptr = result; ptr != NULL; ptr = ptr->ai_next) { + sock = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); + if (sock == INVALID_SOCKET) { + WSACleanup(); + return tensorflow::errors::Internal("Socket failed with error: ", + WSAGetLastError()); + } + + res = connect(sock, ptr->ai_addr, (int)ptr->ai_addrlen); + if (res == SOCKET_ERROR) { + closesocket(sock); + sock = INVALID_SOCKET; + continue; + } + + break; + } + + freeaddrinfo(result); + + if (sock == INVALID_SOCKET) { + WSACleanup(); + return tensorflow::errors::Internal("Unable to connect to server"); + } + + LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established"; + + return tensorflow::Status::OK(); +} + +tensorflow::Status PlainClient::Disconnect() { + int res = shutdown(sock, SD_SEND); + closesocket(sock); + WSACleanup(); + + if (res == SOCKET_ERROR) + return tensorflow::errors::Internal("Shutdown failed with error: ", + WSAGetLastError()); + else + return tensorflow::Status::OK(); +} + +bool PlainClient::IsConnected() { return sock != INVALID_SOCKET; } + +int PlainClient::GetSocketDescriptor() { return sock; } + +tensorflow::Status PlainClient::ReadData(uint8_t *buf, int32_t length) { + int recieved = 0; + + while (recieved < length) { + int res = recv(sock, buf, length - recieved, 0); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while reading from socket: ", res); + + if (res == 0) + return tensorflow::errors::Internal("Server closed connection"); + + recieved += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status PlainClient::WriteData(uint8_t *buf, int32_t length) { + int sent = 0; + + while (sent < length) { + int res = send(sock, buf, length - sent, 0); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while writing into socket: ", res); + + sent += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc new file mode 100644 index 0000000000..a1101b91f3 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc @@ -0,0 +1,149 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "ignite_ssl_wrapper.h" + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" + +#include +#include + +namespace ignite { + +static int PasswordCb(char *buf, int size, int rwflag, void *password) { + strncpy(buf, (char *)(password), size); + buf[size - 1] = '\0'; + return (strlen(buf)); +} + +SslWrapper::SslWrapper(std::shared_ptr client, std::string certfile, + std::string keyfile, std::string cert_password) + : client(client), + certfile(certfile), + keyfile(keyfile), + cert_password(cert_password), + ctx(NULL) {} + +SslWrapper::~SslWrapper() { + if (IsConnected()) { + tensorflow::Status status = Disconnect(); + if (!status.ok()) LOG(WARNING) << status.ToString(); + } + + if (ctx != NULL) { + SSL_CTX_free(ctx); + ctx = NULL; + } +} + +tensorflow::Status SslWrapper::InitSslContext() { + OpenSSL_add_all_algorithms(); + SSL_load_error_strings(); + + ctx = SSL_CTX_new(SSLv23_method()); + if (ctx == NULL) + return tensorflow::errors::Internal("Couldn't create SSL context"); + + SSL_CTX_set_default_passwd_cb(ctx, PasswordCb); + SSL_CTX_set_default_passwd_cb_userdata(ctx, (void *)cert_password.c_str()); + + if (SSL_CTX_use_certificate_chain_file(ctx, certfile.c_str()) != 1) + return tensorflow::errors::Internal( + "Couldn't load cetificate chain (file '", certfile, "')"); + + std::string private_key_file = keyfile.empty() ? certfile : keyfile; + if (SSL_CTX_use_PrivateKey_file(ctx, private_key_file.c_str(), + SSL_FILETYPE_PEM) != 1) + return tensorflow::errors::Internal("Couldn't load private key (file '", + private_key_file, "')"); + + return tensorflow::Status::OK(); +} + +tensorflow::Status SslWrapper::Connect() { + tensorflow::Status status; + + if (ctx == NULL) { + status = InitSslContext(); + if (!status.ok()) return status; + } + + ssl = SSL_new(ctx); + if (ssl == NULL) + return tensorflow::errors::Internal("Failed to establish SSL connection"); + + status = client->Connect(); + if (!status.ok()) return status; + + SSL_set_fd(ssl, client->GetSocketDescriptor()); + if (SSL_connect(ssl) != 1) + return tensorflow::errors::Internal("Failed to establish SSL connection"); + + LOG(INFO) << "SSL connection established"; + + return tensorflow::Status::OK(); +} + +tensorflow::Status SslWrapper::Disconnect() { + SSL_free(ssl); + + LOG(INFO) << "SSL connection closed"; + + return client->Disconnect(); +} + +bool SslWrapper::IsConnected() { return client->IsConnected(); } + +int SslWrapper::GetSocketDescriptor() { return client->GetSocketDescriptor(); } + +tensorflow::Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { + int recieved = 0; + + while (recieved < length) { + int res = SSL_read(ssl, buf, length - recieved); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while reading from SSL socket: ", res); + + if (res == 0) + return tensorflow::errors::Internal("Server closed SSL connection"); + + recieved += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +tensorflow::Status SslWrapper::WriteData(uint8_t *buf, int32_t length) { + int sent = 0; + + while (sent < length) { + int res = SSL_write(ssl, buf, length - sent); + + if (res < 0) + return tensorflow::errors::Internal( + "Error occured while writing into socket: ", res); + + sent += res; + buf += res; + } + + return tensorflow::Status::OK(); +} + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h new file mode 100644 index 0000000000..e0c2a242dc --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef IGNITE_CLIENT_H +#define IGNITE_CLIENT_H +#include "ignite_client.h" +#endif + +#include +#include + +namespace ignite { + +class SslWrapper : public Client { + public: + SslWrapper(std::shared_ptr client, std::string certfile, + std::string keyfile, std::string cert_password); + ~SslWrapper(); + + virtual tensorflow::Status Connect(); + virtual tensorflow::Status Disconnect(); + virtual bool IsConnected(); + virtual int GetSocketDescriptor(); + virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length); + virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length); + + private: + std::shared_ptr client; + std::string certfile; + std::string keyfile; + std::string cert_password; + SSL_CTX* ctx; + SSL* ssl; + tensorflow::Status InitSslContext(); +}; + +} // namespace ignite diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc new file mode 100644 index 0000000000..17494d1cfd --- /dev/null +++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc @@ -0,0 +1,64 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("IgniteDataset") + .Input("cache_name: string") + .Input("host: string") + .Input("port: int32") + .Input("local: bool") + .Input("part: int32") + .Input("page_size: int32") + .Input("username: string") + .Input("password: string") + .Input("certfile: string") + .Input("keyfile: string") + .Input("cert_password: string") + .Input("schema: int32") + .Input("permutation: int32") + .Output("handle: variant") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Apache Ignite is a memory-centric distributed database, caching, and processing +platform for transactional, analytical, and streaming workloads, delivering +in-memory speeds at petabyte scale. This contrib package contains an +integration between Apache Ignite and TensorFlow. The integration is based on +tf.data from TensorFlow side and Binary Client Protocol from Apache Ignite side. +It allows to use Apache Ignite as a datasource for neural network training, +inference and all other computations supported by TensorFlow. Ignite Dataset +is based on Apache Ignite Binary Client Protocol. + +cache_name: Ignite Cache Name. +host: Ignite Thin Client Host. +port: Ignite Thin Client Port. +local: Local flag that defines that data should be fetched from local host only. +part: Partition data should be fetched from. +page_size: Page size for Ignite Thin Client. +username: Username to authenticate via Ignite Thin Client. +password: Password to authenticate via Ignite Thin Client. +certfile: SSL certificate to establish SSL connection. +keyfile: Private key file to establish SSL connection. +cert_password: SSL certificate password to establish SSL connection. +schema: Internal structure that defines schema of cache objects. +permutation: Internal structure that defines permutation of cache objects. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py new file mode 100644 index 0000000000..6fa073957a --- /dev/null +++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py @@ -0,0 +1,763 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Ignite Dataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import socket +import struct +import ssl +import abc + +from tensorflow.contrib.ignite.python.ops import ignite_op_loader # pylint: disable=unused-import +from tensorflow.contrib.ignite.python.ops import gen_dataset_ops +from tensorflow.python.data.ops.dataset_ops import Dataset +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape + +class Readable(): + """Readable abstract class that exposes methods to do reading-related + operations. + """ + + @abc.abstractmethod + def __init__(self): + pass + + def read_byte(self): + """Reads and returnes byte.""" + return self.__read("b", 1) + + def read_short(self): + """Reads and returns short (2 bytes, little-endian).""" + return self.__read("h", 2) + + def read_int(self): + """Reads and returns int (4 bytes, little-endian).""" + return self.__read("i", 4) + + def read_long(self): + """Reads and returns long (8 bytes, little-endian).""" + return self.__read("q", 8) + + def skip(self, length): + """Skips the specified number of bytes.""" + self.read_data(length) + + @abc.abstractmethod + def read_data(self, length): + """Reads the specified number of bytes and returns them as a buffer.""" + return None + + def __read(self, data_type, length): + """Reads, unpacks and returns specified type (little-endian).""" + buffer = self.read_data(length) + return struct.unpack("<" + data_type, buffer)[0] + +class DataBuffer(Readable): + """DataBuffer class that exposes methods to read data from a byte buffer.""" + + def __init__(self, buffer): + """Constructs a new instance of DataBuffer based on the specified byte + buffer. + + Args: + buffer: Buffer to be read. + """ + Readable.__init__(self) + self.buffer = buffer + self.ptr = 0 + + def read_data(self, length): + """Reads the specified number of bytes and returns them as a buffer.""" + data_buffer = self.buffer[self.ptr:][:length] + self.ptr += length + return data_buffer + +class TcpClient(Readable): + """TcpClient class that exposes methods to read data from a socket.""" + + def __init__(self, host, port, certfile=None, keyfile=None, password=None): + """Constructs a new instance of TcpClient based on the specified host + and port. + + Args: + host: Host to be connected. + port: Port to be connected. + certfile: File in PEM format containing the certificate as well as any + number of CA certificates needed to establish the certificate’s + authenticity. + keyfile: File containing the private key (otherwise the private key + will be taken from certfile as well). + password: Password to be used if the private key is encrypted and a + password is necessary. + """ + Readable.__init__(self) + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + + if certfile is not None: + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.load_cert_chain(certfile, keyfile, password) + self.sock = context.wrap_socket(self.sock) + else: + if keyfile is not None: + raise Exception("SSL is disabled, keyfile must not be specified \ + (to enable SSL specify certfile)") + if password is not None: + raise Exception("SSL is disabled, password must not be specified \ + (to enable SSL specify certfile)") + + self.host = host + self.port = port + + def __enter__(self): + """Connects to host and port specified in the constructor.""" + self.sock.connect((self.host, self.port)) + return self + + def __exit__(self, t, v, traceback): + """Disconnects the socket.""" + self.sock.close() + + def write_byte(self, v): + """Writes the specified byte.""" + self.__write(v, "b") + + def write_short(self, v): + """Writes the specified short (2 bytes, little-endian).""" + self.__write(v, "h") + + def write_int(self, v): + """Writes the specified short (4 bytes, little-endian).""" + self.__write(v, "i") + + def write_long(self, v): + """Writes the specified int (8 bytes, little-endian).""" + self.__write(v, "q") + + def write_string(self, v): + """Writes the specified string.""" + self.sock.sendall(v.encode("UTF-8")) + + def read_data(self, length): + """Reads the specified number of bytes and returns them as a buffer.""" + data_buffer = None + rem = length + while rem > 0: + buf = self.sock.recv(rem) + rem = rem - len(buf) + if data_buffer is None: + data_buffer = buf + else: + data_buffer += buf + return data_buffer + + def __write(self, value, data_type): + """Packs and writes data using the specified type (little-endian).""" + data_buffer = struct.pack("<" + data_type, value) + self.sock.sendall(data_buffer) + +class BinaryType(): + """BinaryType class that encapsulated type id, type name and fields.""" + + def __init__(self, type_id, type_name, fields): + """Constructs a new instance of BinaryType.""" + self.type_id = type_id + self.type_name = type_name + self.fields = fields + +class BinaryField(): + """BinaryField class that encapsulated field name, type id and field id.""" + + def __init__(self, field_name, type_id, field_id): + """Constructs a new instance of BinaryField.""" + self.field_name = field_name + self.type_id = type_id + self.field_id = field_id + +# Binary types defined in Apache Ignite Thin client and supported by +# TensorFlow on Apache Ignite, see +# https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. +types = { + 1: (dtypes.uint8, False), + 2: (dtypes.int16, False), + 3: (dtypes.int32, False), + 4: (dtypes.int64, False), + 5: (dtypes.float32, False), + 6: (dtypes.float64, False), + 7: (dtypes.uint16, False), + 8: (dtypes.bool, False), + 9: (dtypes.string, False), + 12: (dtypes.uint8, True), + 13: (dtypes.int16, True), + 14: (dtypes.int32, True), + 15: (dtypes.int64, True), + 16: (dtypes.float32, True), + 17: (dtypes.float64, True), + 18: (dtypes.uint16, True), + 19: (dtypes.bool, True), + 20: (dtypes.string, True) +} + +class TypeTreeNode(): + """TypeTreeNode class exposes methods to format object tree structure + data. + """ + def __init__(self, name, type_id, fields=None, permutation=None): + """Constructs a new instance of TypeTreeNode. + + Args: + name: Name of the object tree node. + type_id: Type id of the object tree node. + fields: List of fields (children of the object tree node). + permutation: Permutation that should be applied to order object children. + """ + self.name = name + self.type_id = type_id + self.fields = fields + self.permutation = permutation + + def to_output_classes(self): + """Formats the tree object the way required in 'output_classes' property of + dataset. + """ + if self.fields is None: + return ops.Tensor + output_classes = {} + for field in self.fields: + output_classes[field.name] = field.to_output_classes() + return output_classes + + def to_output_shapes(self): + """Formats the tree object the way required in 'output_shapes' property of + dataset. + """ + if self.fields is None: + object_type = types[self.type_id] + if object_type is not None: + is_array = object_type[1] + if is_array: + return tensor_shape.TensorShape([None]) + return tensor_shape.TensorShape([]) + raise Exception("Unsupported type [type_id=%d]" % self.type_id) + output_shapes = {} + for field in self.fields: + output_shapes[field.name] = field.to_output_shapes() + return output_shapes + + def to_output_types(self): + """Formats the tree object the way required in 'output_types' property of + dataset. + """ + if self.fields is None: + object_type = types[self.type_id] + if object_type is not None: + return object_type[0] + raise Exception("Unsupported type [type_id=%d]" % self.type_id) + else: + output_types = {} + for field in self.fields: + output_types[field.name] = field.to_output_types() + return output_types + + def to_flat(self): + """Returns a list of leaf node types.""" + return self.to_flat_rec([]) + + def to_permutation(self): + """Returns a permutation that should be applied to order object leafs.""" + correct_order_dict = {} + self.traversal_rec(correct_order_dict, 0) + object_order = [] + self.traversal_permutation_rec(object_order) + return [correct_order_dict[o] for o in object_order] + + def to_flat_rec(self, flat): + """Formats a list of leaf node types.""" + flat.append(self.type_id) + if self.fields is not None: + for field in self.fields: + field.to_flat_rec(flat) + return flat + + def traversal_permutation_rec(self, permutation): + """Collects nodes in accordance with permutation.""" + if self.fields is None: + permutation.append(self) + else: + for idx in self.permutation: + field = self.fields[idx] + field.traversal_permutation_rec(permutation) + + def traversal_rec(self, d, i): + """Collects nodes in pre-order traversal.""" + if self.fields is None: + d[self] = i + i += 1 + else: + for field in self.fields: + i = field.traversal_rec(d, i) + return i + +class IgniteClient(TcpClient): + """IgniteClient class exposes methods to work with Apache Ignite using Thin + client. This client works with assumption that all object in the cache + have the same structure (homogeneous objects) and the cache contains at + least one object. + """ + def __init__(self, host, port, username=None, password=None, certfile=None,\ + keyfile=None, cert_password=None): + """Constructs a new instance of IgniteClient. + + Args: + host: Apache Ignite Thin client host to be connected. + port: Apache Ignite Thin client port to be connected. + username: Apache Ignite Thin Client authentication username. + password: Apache Ignite Thin Client authentication password. + certfile: File in PEM format containing the certificate as well as + any number of CA certificates needed to establish the certificate’s + authenticity. + keyfile: File containing the private key (otherwise the private key + will be taken from certfile as well). + cert_password: Password to be used if the private key is encrypted and a + password is necessary. + """ + TcpClient.__init__(self, host, port, certfile, keyfile, cert_password) + self.username = username + self.password = password + + def handshake(self): + """Makes a handshake required to be made after connect before any other + calls. + """ + msg_len = 8 + + if self.username is None: + msg_len += 1 + else: + msg_len += 5 + len(self.username) + + if self.password is None: + msg_len += 1 + else: + msg_len += 5 + len(self.password) + + self.write_int(msg_len) # Message length + self.write_byte(1) # Handshake operation + self.write_short(1) # Version (1.1.0) + self.write_short(1) + self.write_short(0) + self.write_byte(2) # Thin client + + if self.username is None: # Username + self.write_byte(101) + else: + self.write_byte(9) + self.write_int(len(self.username)) + self.write_string(self.username) + + if self.password is None: # Password + self.write_byte(101) + else: + self.write_byte(9) + self.write_int(len(self.password)) + self.write_string(self.password) + + self.read_int() # Result length + res = self.read_byte() + + if res != 1: + serv_ver_major = self.read_short() + serv_ver_minor = self.read_short() + serv_ver_patch = self.read_short() + err_msg = self.__parse_string() + if err_msg is None: + raise Exception("Handshake Error [result=%d, version=%d.%d.%d]" \ + % (res, serv_ver_major, serv_ver_minor, serv_ver_patch)) + else: + raise Exception("Handshake Error [result=%d, version=%d.%d.%d, \ + message='%s']" % ( + res, + serv_ver_major, + serv_ver_minor, + serv_ver_patch, + err_msg + )) + + def get_cache_type(self, cache_name): + """Collects type information about objects stored in the specified + cache. + """ + cache_name_hash = self.__java_hash_code(cache_name) + self.write_int(25) # Message length + self.write_short(2000) # Operation code + self.write_long(0) # Request ID + self.write_int(cache_name_hash) # Cache name + self.write_byte(0) # Flags + self.write_byte(101) # Filter (NULL) + self.write_int(1) # Cursor page size + self.write_int(-1) # Partition to query + self.write_byte(0) # Local flag + + result_length = self.read_int() + self.read_long() # Request id + status = self.read_int() + + if status != 0: + err_msg = self.__parse_string() + if err_msg is None: + raise Exception("Scan Query Error [status=%s]" % status) + else: + raise Exception("Scan Query Error [status=%s, message='%s']" \ + % (status, err_msg)) + + self.read_long() # Cursor id + row_count = self.read_int() + + if row_count == 0: + raise Exception("Scan Query returned empty result, so it's \ + impossible to derive the cache type") + + payload = DataBuffer(self.read_data(result_length - 25)) + + self.read_byte() # Next page + + res = TypeTreeNode("root", 0, [ + self.__collect_types("key", payload), + self.__collect_types("val", payload) + ], [0, 1]) + + return res + + def __java_hash_code(self, s): + """Computes hash code of the specified string using Java code.""" + h = 0 + for c in s: + h = (31 * h + ord(c)) & 0xFFFFFFFF + return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000 + + def __collect_types(self, field_name, data): + """Extracts type information from the specified object.""" + type_id = data.read_byte() + + # Byte scalar. + if type_id == 1: + data.skip(1) + return TypeTreeNode(field_name, type_id) + + # Short scalar. + if type_id == 2: + data.skip(2) + return TypeTreeNode(field_name, type_id) + + # Integer scalar. + if type_id == 3: + data.skip(4) + return TypeTreeNode(field_name, type_id) + + # Long scalar. + if type_id == 4: + data.skip(8) + return TypeTreeNode(field_name, type_id) + + # Float scalar. + if type_id == 5: + data.skip(4) + return TypeTreeNode(field_name, type_id) + + # Double scalar. + if type_id == 6: + data.skip(8) + return TypeTreeNode(field_name, type_id) + + # Char scalar. + if type_id == 7: + data.skip(2) + return TypeTreeNode(field_name, type_id) + + # Bool scalar. + if type_id == 8: + data.skip(1) + return TypeTreeNode(field_name, type_id) + + # String scalar. + if type_id == 9: + length = data.read_int() + data.skip(length) + return TypeTreeNode(field_name, type_id) + + # UUID scalar. + if type_id == 10: + data.skip(16) + return TypeTreeNode(field_name, type_id) + + # Date scalar. + if type_id == 11: + data.skip(8) + return TypeTreeNode(field_name, type_id) + + # Byte array. + if type_id == 12: + length = data.read_int() + data.skip(length) + return TypeTreeNode(field_name, type_id) + + # Short array. + if type_id == 13: + length = data.read_int() + data.skip(length * 2) + return TypeTreeNode(field_name, type_id) + + # Integer array. + if type_id == 14: + length = data.read_int() + data.skip(length * 4) + return TypeTreeNode(field_name, type_id) + + # Long array. + if type_id == 15: + length = data.read_int() + data.skip(length * 8) + return TypeTreeNode(field_name, type_id) + + # Float array. + if type_id == 16: + length = data.read_int() + data.skip(length * 4) + return TypeTreeNode(field_name, type_id) + + # Double array. + if type_id == 17: + length = data.read_int() + data.skip(length * 8) + return TypeTreeNode(field_name, type_id) + + # Char array. + if type_id == 18: + length = data.read_int() + data.skip(length * 2) + return TypeTreeNode(field_name, type_id) + + # Bool array. + if type_id == 19: + length = data.read_int() + data.skip(length) + return TypeTreeNode(field_name, type_id) + + # String array. + if type_id == 20: + length = data.read_int() + for _ in range(length): + header = data.read_byte() + if header == 9: + str_length = data.read_int() + data.skip(str_length) + elif header == 101: + pass + else: + raise Exception("Unknown binary type when expected string \ + [type_id=%d]" % header) + return TypeTreeNode(field_name, type_id) + + # UUID array. + if type_id == 21: + length = data.read_int() + data.skip(length * 16) # TODO: support NULL values. + return TypeTreeNode(field_name, type_id) + + # Date array. + if type_id == 22: + length = data.read_int() + data.skip(length * 8) + return TypeTreeNode(field_name, type_id) + + # Wrapped Binary Object. + if type_id == 27: + length = data.read_int() + inner_data = data.read_data(length) + data.read_int() # Offset + return self.__collect_types(field_name, DataBuffer(inner_data)) + + # Complex Object. + if type_id == 103: + data.read_byte() # Object version + data.read_short() # Object flags + obj_type_id = data.read_int() + data.read_int() # Object hash code + obj_length = data.read_int() + data.read_int() # Object schema id + obj_schema_offset = data.read_int() + + obj_type = self.__get_type(obj_type_id) + children = [] + + for obj_field in obj_type.fields: + child = self.__collect_types(obj_field.field_name, data) + children.append(child) + + children_sorted = sorted(children, key=lambda child: child.name) + permutation = [children_sorted.index(child) for child in children] + children = children_sorted + + data.skip(obj_length - obj_schema_offset) + + return TypeTreeNode(field_name, type_id, children, permutation) + + raise Exception("Unknown binary type [type_id=%d]" % type_id) + + def __get_type(self, type_id): + """Queries Apache Ignite information about type by type id.""" + self.write_int(14) # Message length + self.write_short(3002) # Operation code + self.write_long(0) # Request ID + self.write_int(type_id) # Type ID + + self.read_int() # Result length + self.read_long() # Request id + status = self.read_int() + + if status != 0: + err_msg = self.__parse_string() + if err_msg is None: + raise Exception("Get Binary Type Error [status=%d, message='%s']" \ + % (status, err_msg)) + else: + raise Exception("Get Binary Type Error [status=%d]" % status) + + binary_type_exists = self.read_byte() + + if binary_type_exists == 0: + raise Exception("Binary type not found [type_id=%d] " % type_id) + + binary_type_id = self.read_int() + binary_type_name = self.__parse_string() + self.__parse_string() # Affinity field name + + fields = [] + for _ in range(self.read_int()): + field_name = self.__parse_string() + field_type_id = self.read_int() + field_id = self.read_int() + + field = BinaryField(field_name, field_type_id, field_id) + fields.append(field) + + is_enum = self.read_byte() + if is_enum == 1: + raise Exception("Enum fields are not supported yet") + + schema_cnt = self.read_int() + for _ in range(schema_cnt): + self.read_int() # Schema id + field_cnt = self.read_int() + self.skip(field_cnt * 4) + + return BinaryType(binary_type_id, binary_type_name, fields) + + def __parse_string(self): + """Parses string.""" + header = self.read_byte() + if header == 9: + length = self.read_int() + return self.read_data(length).decode("utf-8") + if header == 101: + return None + raise Exception("Unknown binary type when expected string [type_id=%d]" \ + % header) + +class IgniteDataset(Dataset): + """Apache Ignite is a memory-centric distributed database, caching, and + processing platform for transactional, analytical, and streaming workloads, + delivering in-memory speeds at petabyte scale. This contrib package + contains an integration between Apache Ignite and TensorFlow. The + integration is based on tf.data from TensorFlow side and Binary Client + Protocol from Apache Ignite side. It allows to use Apache Ignite as a + datasource for neural network training, inference and all other + computations supported by TensorFlow. Ignite Dataset is based on Apache + Ignite Binary Client Protocol. + """ + + def __init__(self, cache_name, host="localhost", port=10800, local=False,\ + part=-1, page_size=100, username=None, password=None, certfile=None,\ + keyfile=None, cert_password=None): + """Create a IgniteDataset. + + Args: + cache_name: Cache name to be used as datasource. + host: Apache Ignite Thin Client host to be connected. + port: Apache Ignite Thin Client port to be connected. + local: Local flag that defines to query only local data. + part: Number of partitions to be queried. + page_size: Apache Ignite Thin Client page size. + username: Apache Ignite Thin Client authentication username. + password: Apache Ignite Thin Client authentication password. + certfile: File in PEM format containing the certificate as well as + any number of CA certificates needed to establish the certificate’s + authenticity. + keyfile: File containing the private key (otherwise the private key + will be taken from certfile as well). + cert_password: Password to be used if the private key is encrypted and a + password is necessary. + """ + super(IgniteDataset, self).__init__() + + with IgniteClient(host, port, username, password, certfile, keyfile,\ + cert_password) as client: + client.handshake() + self.cache_type = client.get_cache_type(cache_name) + + self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,\ + name="cache_name") + self.host = ops.convert_to_tensor(host, dtype=dtypes.string, name="host") + self.port = ops.convert_to_tensor(port, dtype=dtypes.int32, name="port") + self.local = ops.convert_to_tensor(local, dtype=dtypes.bool, name="local") + self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part") + self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,\ + name="page_size") + self.username = ops.convert_to_tensor("" if username is None else username,\ + dtype=dtypes.string, name="username") + self.password = ops.convert_to_tensor("" if password is None else password,\ + dtype=dtypes.string, name="password") + self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,\ + dtype=dtypes.string, name="certfile") + self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,\ + dtype=dtypes.string, name="keyfile") + self.cert_password = ops.convert_to_tensor("" if cert_password is None\ + else cert_password, dtype=dtypes.string, name="cert_password") + self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),\ + dtype=dtypes.int32, name="schema") + self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),\ + dtype=dtypes.int32, name="permutation") + + def _as_variant_tensor(self): + return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,\ + self.port, self.local, self.part, self.page_size, self.username,\ + self.password, self.certfile, self.keyfile, self.cert_password,\ + self.schema, self.permutation) + + @property + def output_classes(self): + return self.cache_type.to_output_classes() + + @property + def output_shapes(self): + return self.cache_type.to_output_shapes() + + @property + def output_types(self): + return self.cache_type.to_output_types() diff --git a/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py new file mode 100644 index 0000000000..8115bda85b --- /dev/null +++ b/tensorflow/contrib/ignite/python/ops/ignite_op_loader.py @@ -0,0 +1,25 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Python helper for loading Ignite ops and kernels.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.util import loader +from tensorflow.python.platform import resource_loader + +_dataset_ops = loader.load_op_library( + resource_loader.get_path_to_datafile("../../_dataset_ops.so")) diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh new file mode 100755 index 0000000000..f4607ce8ad --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/bin/start-plain.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-plain.xml & +sleep 5 # Wait Apache Ignite to be started + +./apache-ignite-fabric/bin/sqlline.sh \ +-u "jdbc:ignite:thin://127.0.0.1/" \ +--run=/data/sql/init.sql + +tail -f nohup.out diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh b/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh new file mode 100755 index 0000000000..dde1162816 --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/bin/start-ssl-auth.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-ssl-auth.xml & +sleep 5 # Wait Apache Ignite to be started + +./apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://127.0.0.1/?\ +sslMode=require&\ +sslClientCertificateKeyStoreUrl=/data/keystore/client.jks&\ +sslClientCertificateKeyStorePassword=123456&\ +sslTrustAll=true&\ +username=ignite&\ +password=ignite" --run=/data/sql/init.sql + +tail -f nohup.out diff --git a/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh b/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh new file mode 100755 index 0000000000..58b40b2738 --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/bin/start-ssl.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +nohup apache-ignite-fabric/bin/ignite.sh /data/config/ignite-config-ssl.xml & +sleep 5 # Wait Apache Ignite to be started + +./apache-ignite-fabric/bin/sqlline.sh -u "jdbc:ignite:thin://127.0.0.1/?\ +sslMode=require&\ +sslClientCertificateKeyStoreUrl=/data/keystore/client.jks&\ +sslClientCertificateKeyStorePassword=123456&\ +sslTrustAll=true" --run=/data/sql/init.sql --verbose=true + +tail -f nohup.out diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml new file mode 100644 index 0000000000..d900174a8a --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-plain.xml @@ -0,0 +1,39 @@ + + + + + + + + + + + + + 127.0.0.1 + + + + + + + + + diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml new file mode 100644 index 0000000000..8e001b28ab --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl-auth.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 127.0.0.1 + + + + + + + + + diff --git a/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml new file mode 100644 index 0000000000..42d480c114 --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/config/ignite-config-ssl.xml @@ -0,0 +1,59 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 127.0.0.1 + + + + + + + + + diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py new file mode 100644 index 0000000000..933e62b804 --- /dev/null +++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py @@ -0,0 +1,77 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not +# use this file except in compliance with the License. You may obtain a copy of +# the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations under +# the License. +# ============================================================================== +"""Tests for IgniteDataset.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import tensorflow as tf +from tensorflow.contrib.ignite import IgniteDataset +from tensorflow.python.framework import errors +from tensorflow.python.platform import test + +class IgniteDatasetTest(test.TestCase): + """The Apache Ignite servers have to setup before the test and tear down + after the test manually. The docker engine has to be installed. + + To setup Apache Ignite servers: + $ bash start_ignite.sh + + To tear down Apache Ignite servers: + $ bash stop_ignite.sh + """ + + def test_ignite_dataset_with_plain_client(self): + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300) + self.__check_dataset(ds) + + def test_ignite_dataset_with_ssl_client(self): + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,\ + certfile=os.path.dirname(os.path.realpath(__file__)) +\ + "/keystore/client.pem", cert_password="123456") + self.__check_dataset(ds) + + def test_ignite_dataset_with_ssl_client_and_auth(self): + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,\ + certfile=os.path.dirname(os.path.realpath(__file__)) +\ + "/keystore/client.pem", cert_password="123456",\ + username="ignite", password="ignite") + self.__check_dataset(ds) + + def __check_dataset(self, dataset): + """Checks that dataset provids correct data. + """ + self.assertEquals(tf.int64, dataset.output_types['key']) + self.assertEquals(tf.string, dataset.output_types['val']['NAME']) + self.assertEquals(tf.int64, dataset.output_types['val']['VAL']) + + it = dataset.make_one_shot_iterator() + ne = it.get_next() + + with tf.Session() as sess: + rows = [sess.run(ne), sess.run(ne), sess.run(ne)] + with self.assertRaises(errors.OutOfRangeError): + sess.run(ne) + + self.assertEquals({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\ + rows[0]) + self.assertEquals({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\ + rows[1]) + self.assertEquals({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\ + rows[2]) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/ignite/python/tests/keystore/client.jks b/tensorflow/contrib/ignite/python/tests/keystore/client.jks new file mode 100644 index 0000000000000000000000000000000000000000..1875c71b605253603eb63e446da8f07cd84f64a0 GIT binary patch literal 3232 zcmezO_TO6u1_mZ5W@KPX&dE&8D`8+@G{5kwMTdcbX^%k@(+&eZHZE;8MixdbCP79< zRtAYq8p|C_4X`71ir_3D+J-^_h0{`v;Jq^I7?=cj)By2Gv~;-!V; zrk^*tG^Q9QMw#R_-+6sBwsZISYYa0FeehS&sNA)_h$$$xItuMbc#jm^U z)EsGM7v48t=yRQpg+|uW%p2uNTMy;l<&ooQ-L|Fm+-LC}Ql7ti^n{IXo#DN-aQ4pg z#|pK-#~2r{+<*SvmVdLv(*kBDMkXdk6$4&2POUbNw(q=*jI68-2IYp_2Apinp)72|OhKWB zJO*4K4u>#va6l4_#}45M8t{YUxP&>}i!xK(6EpK*3b=)tT~f$B_|rn8OXqG z;$#%#Pfm15FUm|wPt45IOU}1uEr#pj7x=Nuzv6iE!us=ylaaxU{}zcI(6gN!b!PABw|X+mmI_84U%E|> z{mQW=_S;Qr`2HyF@K#y!w@;mknUR5UakW7uIJRVkS(pqM40PBy6WTl&+kQAP^0LT_ z$@&+h<^>1)D7dHQr4}WYq^2l1rxulDre!84mZTcUvN4Cs^0A1qh&cZ>+QiDUB3!Cv z*QCz@@``+0T3;B*gQS&NBn-qFM7I9ms7T4H+0xmqwdiX68%NFUt%1m4!wL$UMuzTN zKavj~N?{J}lzRGR$J_m%x8=zk+ue9hbM_;R=>eq;wo5iLG6bg=|6T5UzizMm4#|@0 zC3nU6D?6K?{L1|Lkcsc;orbHcP1{x#s@#Ya5UGy8w0-x%FE!^&Uhc18xbL`FJu7bh zEc2IVEfii)u)od6#&v3~%l?{W-yQ1i>pxA|_=W+N&Y67;nwUKenwU00(m5x?%60vB zcixWkLv!^YNbd$_rly8Qu=H*MS!M-c39-n=bx7Ci%v0sW&eC`$Zt&A6J zx2%mi{^N~-V|3b??)9@L{4(U4*Vx*;m6w(6+2y~S{15JDb~|jS)~V&%s_R*>%7H6j zac`Q_$Ni90-+0HM@g`F8#hN-BS3=Tn<3fYRdDv6%!s;cJ0~XFrNNJJ>5CIRj);LinkxSv+h!V3q#BbzJF1B6oSQ$a90?dH&#Df;JoYLpG4o4 z_vH`lZk%qiR`UI=vL~V2U#z=$!t$%x!4s7Yn#sG}#XXJ0Jr*}aeBxPNc>0L`(;ur` z7niRTx7FTo)X_L*nZ@%!-U~AR;=(>@))7-*?Qk|v-SSgPPEf}ZT!%0vCqmjp&4Hu& pj0zRrXg*^OWubk;VkqUaBW_g*^E|kHw{Q7 +-----BEGIN RSA PRIVATE KEY----- +Proc-Type: 4,ENCRYPTED +DEK-Info: DES-EDE3-CBC,CE61EDD98349D0C7 + +Kzl16sj8R7YUXPCEZCqCrY4LSAjiKCRFNOagEehvN9Jpswcz4JbatoFmvVvOCgBF +7kkeCaALhfM5a+46uynZ1sOOFUOn8fUFgguN3lLInWfm6vTuXDPslg0/tRNI0YqW +ujfxyzrm1/k4RX0oLzRE1jZr69VZsBmZndkz9nkz3anWKLE7X/VIFV6U/N6YNPch +BG1Fxpt/HtM9p3B5wNDSjCVaeNP1ROKe3APLRY6k+SppTuntHV5q9Ni82r1l3ahU +zf2QvocSy9MLh+bGusJGHyJJAGuwPHm6ytPwbXGHn5xe4HPIno28j9kN7EL1ZoUs +q0PhipAkFrGIM4zg6nAwVdzY5iGySDQ3fWpz2MkrKMDRftBwA3o/M321NBUW9/2X +l+XmjXcJd0dEOslGxveb6UXLL2YvYszjQXRR4dCV/40bMJL3umRhVSay0NteoXfY +82rQchm2NHKOiDfB4RpD8JJtVQeDSMXc9TH5y2Ua7FZND60JXtFpdnfCVfVZuBJm +yBafyIsXR7EQzLG4z28Dvp4fs42A3JkF+e9Aq6Y6MmYA1wsvIKKT9HKEifqKmbgG +4E9WOZn5IWi4ZJ44VAwN/uBGrLm//3OjByeB9y8vszNbyY8dQ8x5XqnF/IzIvgqc +uKA8xuLAkTFmgRGQ/lmMDR+iMhet5dCtg9Orb9tYVL55JAb/OfsCX0LTJ3Y2RmIx +CaFpkUP7KKYD+69ajnFCxvfGnGxyBkf+JeuDYIZVFklVT9SUtL9RJh26jUdvHt2A +LQerBl8UCkVbPxsxYjdawvxuBNTD6tSRykM8zwtWcvIubp+gxE7png== +-----END RSA PRIVATE KEY----- +Bag Attributes + friendlyName: 1.2.840.113549.1.9.1=#1613636c69656e7440677269646761696e2e636f6d,CN=client,OU=Dev,O=GridGain,ST=SPb,C=RU + localKeyID: 54 69 6D 65 20 31 33 33 39 32 33 39 38 35 39 34 34 36 +subject=/C=RU/ST=SPb/O=GridGain/OU=Dev/CN=client/emailAddress=client@gridgain.com +issuer=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com +-----BEGIN CERTIFICATE----- +MIIC2TCCAkKgAwIBAgIBJDANBgkqhkiG9w0BAQUFADB3MQswCQYDVQQGEwJSVTEM +MAoGA1UECBMDU1BiMQwwCgYDVQQHEwNTUGIxETAPBgNVBAoTCEdyaWRHYWluMQww +CgYDVQQLEwNEZXYxCzAJBgNVBAMTAmNhMR4wHAYJKoZIhvcNAQkBFg9jYUBncmlk +Z2Fpbi5jb20wHhcNMTIwNjA5MTEwNDE3WhcNMzIwNjA5MTEwNDE3WjBxMQswCQYD +VQQGEwJSVTEMMAoGA1UECBMDU1BiMREwDwYDVQQKEwhHcmlkR2FpbjEMMAoGA1UE +CxMDRGV2MQ8wDQYDVQQDEwZjbGllbnQxIjAgBgkqhkiG9w0BCQEWE2NsaWVudEBn +cmlkZ2Fpbi5jb20wgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBANIHHcYiA+CP +EBPKNZJ6mtvN4d9Yj43B5/hzs/TK3e4XImLsMhXaElYtrXQX/SDK7Zv5zdj6AkKH +QkJ9BT8Jw7wvOQx/v4Qxrl+gTgcf6gjk6DvzqMlZUwH+ohbALj2TWsy9y+0uHKal +EVrHpbYeB9TGpD+3NHwO/CG4SySk/Y4nAgMBAAGjezB5MAkGA1UdEwQCMAAwLAYJ +YIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRlMB0GA1Ud +DgQWBBRD/TKyBQyoVxqEupLzUB8hDrSF6DAfBgNVHSMEGDAWgBS1+Ah4ZG58tImL +KqLVX+xBKbeFUTANBgkqhkiG9w0BAQUFAAOBgQCL2vhjwcJkA1OJGuXsuO2/87Zu +HMa7gc4pm+Iol1B1gD2ksQEAU2dz/adD3369H7gZdHuk3RYPeYmD5Ppp9eECDsXc +gNWrNYaqcSTYWRAUe1/St7vB9HzPdOm/eADfQaMnal6fmjfpzTgg65A/2w4GCsqt +RL98pvdAft8v5WSx7A== +-----END CERTIFICATE----- +Bag Attributes + friendlyName: 1.2.840.113549.1.9.1=#160f636140677269646761696e2e636f6d,CN=ca,OU=Dev,O=GridGain,L=SPb,ST=SPb,C=RU +subject=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com +issuer=/C=RU/ST=SPb/L=SPb/O=GridGain/OU=Dev/CN=ca/emailAddress=ca@gridgain.com +-----BEGIN CERTIFICATE----- +MIIDSTCCArKgAwIBAgIJAKmuj925215OMA0GCSqGSIb3DQEBBQUAMHcxCzAJBgNV +BAYTAlJVMQwwCgYDVQQIEwNTUGIxDDAKBgNVBAcTA1NQYjERMA8GA1UEChMIR3Jp +ZEdhaW4xDDAKBgNVBAsTA0RldjELMAkGA1UEAxMCY2ExHjAcBgkqhkiG9w0BCQEW +D2NhQGdyaWRnYWluLmNvbTAeFw0xMjA2MDkwNjU1MTJaFw0zMjA2MDQwNjU1MTJa +MHcxCzAJBgNVBAYTAlJVMQwwCgYDVQQIEwNTUGIxDDAKBgNVBAcTA1NQYjERMA8G +A1UEChMIR3JpZEdhaW4xDDAKBgNVBAsTA0RldjELMAkGA1UEAxMCY2ExHjAcBgkq +hkiG9w0BCQEWD2NhQGdyaWRnYWluLmNvbTCBnzANBgkqhkiG9w0BAQEFAAOBjQAw +gYkCgYEAtd16DCObyM63NKF/cvRcE+8cr1dc3c7mSnTEQ61WfqPJ2QqsQAB6e+5+ +q9Np1SaJyqFTTag6483ibrU+DkGPGgEXndRHtQHQPbStWsf47DBBW2bMi6+bkPox +Cp6BhYO1DQUG5tP9CQ/g32mLQLB7LH0KtS1JcKpAClCjjWZC8b8CAwEAAaOB3DCB +2TAdBgNVHQ4EFgQUtfgIeGRufLSJiyqi1V/sQSm3hVEwgakGA1UdIwSBoTCBnoAU +tfgIeGRufLSJiyqi1V/sQSm3hVGhe6R5MHcxCzAJBgNVBAYTAlJVMQwwCgYDVQQI +EwNTUGIxDDAKBgNVBAcTA1NQYjERMA8GA1UEChMIR3JpZEdhaW4xDDAKBgNVBAsT +A0RldjELMAkGA1UEAxMCY2ExHjAcBgkqhkiG9w0BCQEWD2NhQGdyaWRnYWluLmNv +bYIJAKmuj925215OMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADgYEAhrzd +qusVLHO3wtyu0o+EAFyoDv5avCBTFsQLeDDPMyfDcEO6wfxhTanfH8C7gZc0rRnv +2nbkVbfortHIOfU2wch5gClju0cXSTIXSKOAWPIMp3HLxC/l+KpFo3epFz0rsMVB +M1ymOOdRDdAcTxcTTGY7WJXquEM3ZbT5Gh4RLDk= +-----END CERTIFICATE----- diff --git a/tensorflow/contrib/ignite/python/tests/keystore/server.jks b/tensorflow/contrib/ignite/python/tests/keystore/server.jks new file mode 100644 index 0000000000000000000000000000000000000000..006ececc31118aa18ddb6e4ec27d002e5e11646c GIT binary patch literal 3230 zcmezO_TO6u1_mZLW=c+EU|=-A@M=~y18anysevT}1GBF|6SJp56Vs*z%uI|-Oq>iW z*Y)4sc{|R}fR~L^tIebBJ1-+6D=ULRxgoa!CmVAp3!5-gP^ck~0T+nFAqM zGxPM4^K%X4#CZ*k49pBH4a`hU4UMA2d5u9_6DW7!9M(7=IgA)t8JHV;84MabnHn1z zw%)DcQJ#I`+;)?N^+jJ|gx|}o50ANf?wMD~5$CmGb&F5liCa029D8bXS&zVp76_%YhGh(^HyF~ zwr7|Ba`Hd8pV{rOp<1VwYpbqj!72x?fW^IOP9OI(F*7nSE^fSI(0J28mW?@7mXAe@ zMP%y_j*67Lnk}8(T8pm6zj4&u-Wq7oxDq6<%+k2fpmAOURy7N&msAc|I5#1sNgj|> zg;`h)m>C)WBZodKDD)c{+VAF{>n`=TFvP6j`xmuGAz17PcZI=uWA(!Y z&bto&N%UQLU;e=E#_1+&CEwpFdlI_+#kz|pEWer^JW<)8nY`Ow+|x+hV{t>oC!Xbn zr;q4A{jth*arsJdTkQ=;9gSm_Sv(Kqy&&T+F6@(L9WnLQ4rlY!EkC8?1a&MK7#J8C z*osq&%2JCUd2Mzj0|V18gC?eJ27GK>+H8z0j9N^BjEt-dEKN*{JR;+brdQtCd04*1 z$YblrpVOXBDssj0qL-e! z_3tE^3-va%w|`$L;}Nj<>_c{`U$cx$gv{s4>?kxoKIdBIo;jx!9jbh5SLQifJJrzs zKg>i-)9GBrhpt-!C&V3HuCAQCd+ibRlH+zhVzLkahd3JDTd_wa_k`B9sMrWek?13{ z*dBacz4IgAeb)@M~3AX3(WI_H1kwhYD0)`sg3qynpK@nTnN(CM+8|HvZ!6J^p;z z70HDklrAs3a-g*4a!r-uh2Qu8U1U*M%*r<}VcnA4fROmk&r_YszlR%axuOEMR(yjB1N0!)447OT5hin z`e@h^I(x}V?cc3Y48HPiKl(P^C`fr6_w|pjbFf;;&fi9lD<_^(-g^I5=a%BLPyRf3 zdU0>w&2)9mhk?Jp?O3_5|F@rk_~K+{aP0xBEtu{aG%?*YXkv1L)E11YXnAnJ3JF9l zVQ6SzVg#=xETLS3LRh7Pt@$#x zU$5)brMsWC-|mr^9a-qdEyI5}#;Jbot6!hlq^dr@w9ngL^i9Q-xAje)zPeoZQTZ3^ zI{SX>N-Unl-@TrBs?fRG_WGF4H4fa*%CAD|hiZdLaGfqI%)(^AV4%asnb79J*!IJT zk(WhYOxC|3H7_{WN5MTcFSRJKBsE3BIkl)HGc7YYu_P6#T4`B)<&VueNfCvrDJrI6 z;+q!D*D*4X2Nl)IED{D{*y|Lul*8QE$guv)rbS0jOy~Khd5i0YgcQr&WH!FHLBEbS zt>f>NJd)IQh2=oPhQ#@NHyz)uGMdNwXx?Y07a!}Fce*DBynQM9JI6`FqT^^1yQE9p z&0|vfQ<(D9Zo~)i9e#GAtb#Qsng8dHFBKsVC+4kv*LYIX@L62#V&#`f_RBX&U0c1a z>(PbZn?_qjRA{c@OZB6zBIZyQ>NJaoQmbfAcG9xQoh!B}_ieqmB!8{QbKdug03J1V AGXMYp literal 0 HcmV?d00001 diff --git a/tensorflow/contrib/ignite/python/tests/keystore/trust.jks b/tensorflow/contrib/ignite/python/tests/keystore/trust.jks new file mode 100644 index 0000000000000000000000000000000000000000..a00f1251af72982ddcd42c0274fc7b16e35dbc4c GIT binary patch literal 2432 zcmezO_TO6u1_mYu1_ov@&6J$Tz`$sJ;nlo346G4)rUsS_49va;P0XGKO-!2>Ff%bS zF>x}iT-Seh=j}K@170>xtu~Lg@4SqRtgH+M<%Zk_oNUaYENsF|L7|2`23#NxhcI(+ zKoX3{4&ezJ@Pp*IggM-cGE>|WGxJ~yxP_TrQp;c_F$*&#CmPBb$iQvlWEA61PIO2w z%1lX5%*@kE&d)WF6X!KFGB7i+G%zzYH8hG6=QRd#O`zO?b6DeiYTK0M~`xo2J_N1WG&)h#}GlWUCwLsj*=y49C6 zuc~#PS{UrR!s_waM|oTA_#FGC7{%vaao@^#!FJ2qsN+B07&u0!o#|dbd%`b6u6d2E z&0Bd{*`8hg%gO)XerC7BhH9N!uC2PB1*;sm0v7kCIepyE#LURRxVZ6-LE}vWSvKZS zSw0pq7Llz#I4V-|YPNKCYc0AO|He^sduyOU<4TacGE3t^gT{FcSk)}7UQ#(=;oO9j zCV4IU}j|ej~x1}pwMq*Xxnpl)oW3m;_Zj-th?0T!Vt59?_bm&ggGBm3)7z>`CbM7waybu>5Lv@I+;UX7X-#aZe+0kHrlU zpLmuRo<5@g^v5dK#pNr-ZM8QXbu^AyX7N0b_kxVSxUf%}b;Q(HJDkl^xBQfn6V$N; z=QFnCoXpg`5=dT~Ujff+Om_{Mm~I+0F*!l<8lwuD4+boV5qZqe(7?pd9G1r{p`3Yusegj@mdB7|T4H-iv0|n$PFANDV)NGG0cU@waJ*LF`pkF}vl;>}-9-F+vnl=Q|(^p;SV?%ERZzY3?`&i;A!#xEwPb|i4%8u8Uvb$1eYh92y{F&m$t+tc7u(KqqxIcWy#+@bx7{2HmC%bh_b>gOa=@FI&7Q?Z61tmKb#nOS>(lJ{R>j_f`fe& z+*9*XixNvxQxu$2i%K%nGLsWaQjs#H^IxM)tUN2irCN4P`Wzsy$hW2Sg@HUMp)0dU z7>Hrdk!UH0xv`O<`__--gNIU>gFB_3zS;41|L1LaGRJl|p3|KDNMm|HX@l*Ojf@Pz z>BWDSJKwL{E5AdsqT1)rRfQ@yA_YXM<1cOBeeg@o z`I49WD;VxOE>_Qqn?K9^?x~m%==KUWoEfg)yv(f;8ne#OZ(yH^Yyw;UAp^O`|Tcy z*^z~Q+%o)!W1Q;OzWVi8s0iAC-TxuCwpAuEgR={N3xB zrwW~`ZLg2%T;ss~to$lf(|*h1D}QX(Ns1^`O;Iro6W_FGzK#(&X@C8fO^c46n9lQ0 z^A^_)2`QGl$!vUYgMJ-vTF2ijc_gXr3d@0n4T7ZHcaD>UMaR)3c1f4Go5!T|r!eKI-G~q3JN)cKSp{oOGXKvXUn)W#PRv{TuJNR% v;j_5f#mX;}?3Ztly0&^-*P{!+H%&8h)e~82@&Dp^^P Date: Fri, 24 Aug 2018 18:15:57 +0300 Subject: [PATCH 028/570] Remove duplicated header from README.md. --- tensorflow/contrib/ignite/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md index 9054344e94..f2596fc572 100644 --- a/tensorflow/contrib/ignite/README.md +++ b/tensorflow/contrib/ignite/README.md @@ -1,4 +1,3 @@ -### Ignite Dataset # Ignite Dataset - [Overview](#overview) @@ -164,4 +163,4 @@ After that you will be able to work with it following way: ## Limitations -Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures. \ No newline at end of file +Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures. -- GitLab From 241c1740ee26b57b7a5fe8f72b9d34f4515af760 Mon Sep 17 00:00:00 2001 From: dmitrievanthony Date: Sun, 26 Aug 2018 16:03:04 +0000 Subject: [PATCH 029/570] Update after review: change 'ignite' namespace to 'tensorflow', rename variables to satisty code style, use pointers instead of references. --- tensorflow/contrib/ignite/BUILD | 1 - tensorflow/contrib/ignite/__init__.py | 4 +- .../kernels/ignite_binary_object_parser.cc | 322 +++++++--------- .../kernels/ignite_binary_object_parser.h | 9 +- .../contrib/ignite/kernels/ignite_client.cc | 55 --- .../contrib/ignite/kernels/ignite_client.h | 45 ++- .../contrib/ignite/kernels/ignite_dataset.cc | 105 +++-- .../contrib/ignite/kernels/ignite_dataset.h | 65 ++-- .../ignite/kernels/ignite_dataset_iterator.cc | 358 +++++++++--------- .../ignite/kernels/ignite_dataset_iterator.h | 80 ++-- .../ignite/kernels/ignite_dataset_ops.cc | 10 +- .../ignite/kernels/ignite_plain_client.h | 21 +- .../kernels/ignite_plain_client_unix.cc | 78 ++-- .../kernels/ignite_plain_client_windows.cc | 77 ++-- .../ignite/kernels/ignite_ssl_wrapper.cc | 107 +++--- .../ignite/kernels/ignite_ssl_wrapper.h | 30 +- 16 files changed, 619 insertions(+), 748 deletions(-) delete mode 100644 tensorflow/contrib/ignite/kernels/ignite_client.cc diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD index 9f6c666893..b7d40a99f7 100644 --- a/tensorflow/contrib/ignite/BUILD +++ b/tensorflow/contrib/ignite/BUILD @@ -40,7 +40,6 @@ cc_library( srcs = [ "kernels/ignite_dataset_ops.cc", "kernels/ignite_client.h", - "kernels/ignite_client.cc", "kernels/ignite_plain_client.h", "kernels/ignite_ssl_wrapper.h", "kernels/ignite_ssl_wrapper.cc", diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py index 468920a557..b78829d0f4 100644 --- a/tensorflow/contrib/ignite/__init__.py +++ b/tensorflow/contrib/ignite/__init__.py @@ -30,9 +30,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops \ -import IgniteDataset - +from tensorflow.contrib.ignite.python.ops.ignite_dataset_ops import IgniteDataset from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc index bf0ef8766e..9bf4480d2d 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc @@ -15,290 +15,258 @@ limitations under the License. #include "ignite_binary_object_parser.h" -namespace ignite { +namespace tensorflow { -tensorflow::Status BinaryObjectParser::Parse( - uint8_t*& ptr, std::vector& out_tensors, - std::vector& types) { - uint8_t object_type_id = *ptr; - ptr += 1; +Status BinaryObjectParser::Parse(uint8_t** ptr, + std::vector* out_tensors, + std::vector* types) { + uint8_t object_type_id = **ptr; + *ptr += 1; switch (object_type_id) { case BYTE: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_UINT8, {}); - tensor.scalar()() = *((uint8_t*)ptr); - ptr += 1; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_UINT8, {}); + tensor.scalar()() = *((uint8_t*)*ptr); + *ptr += 1; + out_tensors->push_back(std::move(tensor)); break; } case SHORT: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT16, {}); - tensor.scalar()() = *((int16_t*)ptr); - ptr += 2; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_INT16, {}); + tensor.scalar()() = *((int16_t*)*ptr); + *ptr += 2; + out_tensors->push_back(std::move(tensor)); break; } case INT: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT32, {}); - tensor.scalar()() = *((int32_t*)ptr); - ptr += 4; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_INT32, {}); + tensor.scalar()() = *((int32_t*)*ptr); + *ptr += 4; + out_tensors->push_back(std::move(tensor)); break; } case LONG: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT64, {}); - tensor.scalar()() = *((int64_t*)ptr); - ptr += 8; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_INT64, {}); + tensor.scalar()() = *((int64_t*)*ptr); + *ptr += 8; + out_tensors->push_back(std::move(tensor)); break; } case FLOAT: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_FLOAT, {}); - tensor.scalar()() = *((float*)ptr); - ptr += 4; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_FLOAT, {}); + tensor.scalar()() = *((float*)*ptr); + *ptr += 4; + out_tensors->push_back(std::move(tensor)); break; } case DOUBLE: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_DOUBLE, {}); - tensor.scalar()() = *((double*)ptr); - ptr += 8; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_DOUBLE, {}); + tensor.scalar()() = *((double*)*ptr); + *ptr += 8; + out_tensors->push_back(std::move(tensor)); break; } case UCHAR: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_UINT16, {}); - tensor.scalar()() = *((uint16_t*)ptr); - ptr += 2; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_UINT16, {}); + tensor.scalar()() = *((uint16_t*)*ptr); + *ptr += 2; + out_tensors->push_back(std::move(tensor)); break; } case BOOL: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_BOOL, {}); - tensor.scalar()() = *((bool*)ptr); - ptr += 1; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_BOOL, {}); + tensor.scalar()() = *((bool*)*ptr); + *ptr += 1; + out_tensors->push_back(std::move(tensor)); break; } case STRING: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_STRING, {}); - tensor.scalar()() = std::string((char*)ptr, length); - ptr += length; - out_tensors.emplace_back(std::move(tensor)); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_STRING, {}); + tensor.scalar()() = std::string((char*)*ptr, length); + *ptr += length; + out_tensors->push_back(std::move(tensor)); break; } case DATE: { - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT64, {}); - tensor.scalar()() = *((int64_t*)ptr); - ptr += 8; - out_tensors.emplace_back(std::move(tensor)); + Tensor tensor(cpu_allocator(), DT_INT64, {}); + tensor.scalar()() = *((int64_t*)*ptr); + *ptr += 8; + out_tensors->push_back(std::move(tensor)); break; } case BYTE_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_UINT8, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_UINT8, TensorShape({length})); - uint8_t* arr = (uint8_t*)ptr; - ptr += length; + uint8_t* arr = (uint8_t*)*ptr; + *ptr += length; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case SHORT_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT16, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_INT16, TensorShape({length})); - int16_t* arr = (int16_t*)ptr; - ptr += length * 2; + int16_t* arr = (int16_t*)*ptr; + *ptr += length * 2; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case INT_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT32, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_INT32, TensorShape({length})); - int32_t* arr = (int32_t*)ptr; - ptr += length * 4; + int32_t* arr = (int32_t*)*ptr; + *ptr += length * 4; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case LONG_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT64, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length})); - int64_t* arr = (int64_t*)ptr; - ptr += length * 8; + int64_t* arr = (int64_t*)*ptr; + *ptr += length * 8; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case FLOAT_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_FLOAT, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_FLOAT, TensorShape({length})); - float* arr = (float*)ptr; - ptr += 4 * length; + float* arr = (float*)*ptr; + *ptr += 4 * length; std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + out_tensors->push_back(std::move(tensor)); break; } case DOUBLE_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_DOUBLE, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_DOUBLE, TensorShape({length})); - double* arr = (double*)ptr; - ptr += 8 * length; + double* arr = (double*)*ptr; + *ptr += 8 * length; std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + out_tensors->push_back(std::move(tensor)); break; } case UCHAR_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_UINT16, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_UINT16, TensorShape({length})); - uint16_t* arr = (uint16_t*)ptr; - ptr += length * 2; + uint16_t* arr = (uint16_t*)*ptr; + *ptr += length * 2; - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case BOOL_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_BOOL, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_BOOL, TensorShape({length})); - bool* arr = (bool*)ptr; - ptr += length; + bool* arr = (bool*)*ptr; + *ptr += length; std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + out_tensors->push_back(std::move(tensor)); break; } case STRING_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_STRING, - tensorflow::TensorShape({length})); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_STRING, TensorShape({length})); for (int32_t i = 0; i < length; i++) { - int32_t str_length = *((int32_t*)ptr); - ptr += 4; - const int8_t* str = (const int8_t*)ptr; - ptr += str_length; + int32_t str_length = *((int32_t*)*ptr); + *ptr += 4; + const int8_t* str = (const int8_t*)*ptr; + *ptr += str_length; tensor.vec()(i) = std::string((char*)str, str_length); } - out_tensors.emplace_back(std::move(tensor)); + out_tensors->push_back(std::move(tensor)); break; } case DATE_ARR: { - int32_t length = *((int32_t*)ptr); - ptr += 4; - tensorflow::Tensor tensor(tensorflow::cpu_allocator(), - tensorflow::DT_INT64, - tensorflow::TensorShape({length})); - int64_t* arr = (int64_t*)ptr; - ptr += length * 8; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors.emplace_back(std::move(tensor)); + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length})); + int64_t* arr = (int64_t*)*ptr; + *ptr += length * 8; + + std::copy_n(arr, length, tensor.flat().data()); + out_tensors->push_back(std::move(tensor)); break; } case WRAPPED_OBJ: { - int32_t byte_arr_size = *((int32_t*)ptr); - ptr += 4; + int32_t byte_arr_size = *((int32_t*)*ptr); + *ptr += 4; - tensorflow::Status status = Parse(ptr, out_tensors, types); - if (!status.ok()) return status; + TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types)); - int32_t offset = *((int32_t*)ptr); - ptr += 4; + int32_t offset = *((int32_t*)*ptr); + *ptr += 4; break; } case COMPLEX_OBJ: { - uint8_t version = *ptr; - ptr += 1; - int16_t flags = *((int16_t*)ptr); // USER_TYPE = 1, HAS_SCHEMA = 2 - ptr += 2; - int32_t type_id = *((int32_t*)ptr); - ptr += 4; - int32_t hash_code = *((int32_t*)ptr); - ptr += 4; - int32_t length = *((int32_t*)ptr); - ptr += 4; - int32_t schema_id = *((int32_t*)ptr); - ptr += 4; - int32_t schema_offset = *((int32_t*)ptr); - ptr += 4; - - uint8_t* end = ptr + schema_offset - 24; + uint8_t version = **ptr; + *ptr += 1; + int16_t flags = *((int16_t*)*ptr); // USER_TYPE = 1, HAS_SCHEMA = 2 + *ptr += 2; + int32_t type_id = *((int32_t*)*ptr); + *ptr += 4; + int32_t hash_code = *((int32_t*)*ptr); + *ptr += 4; + int32_t length = *((int32_t*)*ptr); + *ptr += 4; + int32_t schema_id = *((int32_t*)*ptr); + *ptr += 4; + int32_t schema_offset = *((int32_t*)*ptr); + *ptr += 4; + + uint8_t* end = *ptr + schema_offset - 24; int32_t i = 0; - while (ptr < end) { + while (*ptr < end) { i++; - tensorflow::Status status = Parse(ptr, out_tensors, types); - if (!status.ok()) return status; + TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types)); } - ptr += (length - schema_offset); + *ptr += (length - schema_offset); break; } default: { - return tensorflow::errors::Internal("Unknowd binary type (type id ", - (int)object_type_id, ")"); + return errors::Internal("Unknowd binary type (type id ", + (int)object_type_id, ")"); } } - return tensorflow::Status::OK(); + return Status::OK(); } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h index 1e845cbc56..9accbd796f 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h @@ -17,13 +17,12 @@ limitations under the License. #include "tensorflow/core/framework/dataset.h" #include "tensorflow/core/lib/core/status.h" -namespace ignite { +namespace tensorflow { class BinaryObjectParser { public: - tensorflow::Status Parse(uint8_t*& ptr, - std::vector& out_tensors, - std::vector& types); + Status Parse(uint8_t** ptr, std::vector* out_tensors, + std::vector* types); }; enum ObjectType { @@ -51,4 +50,4 @@ enum ObjectType { COMPLEX_OBJ = 103 }; -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.cc b/tensorflow/contrib/ignite/kernels/ignite_client.cc deleted file mode 100644 index 5a8eddb944..0000000000 --- a/tensorflow/contrib/ignite/kernels/ignite_client.cc +++ /dev/null @@ -1,55 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef IGNITE_CLIENT_H -#define IGNITE_CLIENT_H -#include "ignite_client.h" -#endif - -namespace ignite { - -tensorflow::Status Client::ReadByte(uint8_t& data) { - return ReadData((uint8_t*)&data, 1); -} - -tensorflow::Status Client::ReadShort(int16_t& data) { - return ReadData((uint8_t*)&data, 2); -} - -tensorflow::Status Client::ReadInt(int32_t& data) { - return ReadData((uint8_t*)&data, 4); -} - -tensorflow::Status Client::ReadLong(int64_t& data) { - return ReadData((uint8_t*)&data, 8); -} - -tensorflow::Status Client::WriteByte(uint8_t data) { - return WriteData((uint8_t*)&data, 1); -} - -tensorflow::Status Client::WriteShort(int16_t data) { - return WriteData((uint8_t*)&data, 2); -} - -tensorflow::Status Client::WriteInt(int32_t data) { - return WriteData((uint8_t*)&data, 4); -} - -tensorflow::Status Client::WriteLong(int64_t data) { - return WriteData((uint8_t*)&data, 8); -} - -} // namespace ignite diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h index 64e28d75f0..944b3fe184 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_client.h @@ -13,28 +13,43 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ + #include "tensorflow/core/lib/core/status.h" -namespace ignite { +namespace tensorflow { class Client { public: - virtual tensorflow::Status Connect() = 0; - virtual tensorflow::Status Disconnect() = 0; + virtual Status Connect() = 0; + virtual Status Disconnect() = 0; virtual bool IsConnected() = 0; virtual int GetSocketDescriptor() = 0; + virtual Status ReadData(uint8_t* buf, int32_t length) = 0; + virtual Status WriteData(uint8_t* buf, int32_t length) = 0; + + inline Status ReadByte(uint8_t* data) { return ReadData(data, 1); } + + inline Status ReadShort(int16_t* data) { return ReadData((uint8_t*)data, 2); } + + inline Status ReadInt(int32_t* data) { return ReadData((uint8_t*)data, 4); } + + inline Status ReadLong(int64_t* data) { return ReadData((uint8_t*)data, 8); } - virtual tensorflow::Status ReadByte(uint8_t& data); - virtual tensorflow::Status ReadShort(int16_t& data); - virtual tensorflow::Status ReadInt(int32_t& data); - virtual tensorflow::Status ReadLong(int64_t& data); - virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length) = 0; - - virtual tensorflow::Status WriteByte(uint8_t data); - virtual tensorflow::Status WriteShort(int16_t data); - virtual tensorflow::Status WriteInt(int32_t data); - virtual tensorflow::Status WriteLong(int64_t data); - virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length) = 0; + inline Status WriteByte(uint8_t data) { return WriteData(&data, 1); } + + inline Status WriteShort(int16_t data) { + return WriteData((uint8_t*)&data, 2); + } + + inline Status WriteInt(int32_t data) { return WriteData((uint8_t*)&data, 4); } + + inline Status WriteLong(int64_t data) { + return WriteData((uint8_t*)&data, 8); + } }; -} // namespace ignite +} // namespace tensorflow + +#endif diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc index a9bf26955b..f25f8a5b18 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc @@ -16,31 +16,29 @@ limitations under the License. #include "ignite_dataset_iterator.h" #include "tensorflow/core/platform/logging.h" -namespace ignite { +namespace tensorflow { -IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx, - std::string cache_name, std::string host, - tensorflow::int32 port, bool local, - tensorflow::int32 part, - tensorflow::int32 page_size, std::string username, +IgniteDataset::IgniteDataset(OpKernelContext* ctx, std::string cache_name, + std::string host, int32 port, bool local, + int32 part, int32 page_size, std::string username, std::string password, std::string certfile, std::string keyfile, std::string cert_password, - std::vector schema, - std::vector permutation) - : DatasetBase(tensorflow::DatasetContext(ctx)), - cache_name(cache_name), - host(host), - port(port), - local(local), - part(part), - page_size(page_size), - username(username), - password(password), - certfile(certfile), - keyfile(keyfile), - cert_password(cert_password), - schema(schema), - permutation(permutation) { + std::vector schema, + std::vector permutation) + : DatasetBase(DatasetContext(ctx)), + cache_name_(cache_name), + host_(host), + port_(port), + local_(local), + part_(part), + page_size_(page_size), + username_(username), + password_(password), + certfile_(certfile), + keyfile_(keyfile), + cert_password_(cert_password), + schema_(schema), + permutation_(permutation) { SchemaToTypes(); SchemaToShapes(); @@ -53,55 +51,50 @@ IgniteDataset::IgniteDataset(tensorflow::OpKernelContext* ctx, IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; } -std::unique_ptr IgniteDataset::MakeIteratorInternal( - const tensorflow::string& prefix) const { - return std::unique_ptr(new IgniteDatasetIterator( - {this, tensorflow::strings::StrCat(prefix, "::Ignite")}, this->host, - this->port, this->cache_name, this->local, this->part, this->page_size, - this->username, this->password, this->certfile, this->keyfile, - this->cert_password, this->schema, this->permutation)); +std::unique_ptr IgniteDataset::MakeIteratorInternal( + const string& prefix) const { + return std::unique_ptr(new IgniteDatasetIterator( + {this, strings::StrCat(prefix, "::Ignite")}, this->host_, this->port_, + this->cache_name_, this->local_, this->part_, this->page_size_, + this->username_, this->password_, this->certfile_, this->keyfile_, + this->cert_password_, this->schema_, this->permutation_)); } -const tensorflow::DataTypeVector& IgniteDataset::output_dtypes() const { - return dtypes; -} +const DataTypeVector& IgniteDataset::output_dtypes() const { return dtypes_; } -const std::vector& -IgniteDataset::output_shapes() const { - return shapes; +const std::vector& IgniteDataset::output_shapes() const { + return shapes_; } -tensorflow::string IgniteDataset::DebugString() const { - return "IgniteDatasetOp::Dataset"; -} +string IgniteDataset::DebugString() const { return "IgniteDatasetOp::Dataset"; } -tensorflow::Status IgniteDataset::AsGraphDefInternal( - tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b, - tensorflow::Node** output) const { - return tensorflow::errors::Unimplemented( +Status IgniteDataset::AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const { + return errors::Unimplemented( "IgniteDataset does not support 'AsGraphDefInternal'"); } void IgniteDataset::SchemaToTypes() { - for (auto e : schema) { + for (auto e : schema_) { if (e == BYTE || e == BYTE_ARR) { - dtypes.push_back(tensorflow::DT_UINT8); + dtypes_.push_back(DT_UINT8); } else if (e == SHORT || e == SHORT_ARR) { - dtypes.push_back(tensorflow::DT_INT16); + dtypes_.push_back(DT_INT16); } else if (e == INT || e == INT_ARR) { - dtypes.push_back(tensorflow::DT_INT32); + dtypes_.push_back(DT_INT32); } else if (e == LONG || e == LONG_ARR) { - dtypes.push_back(tensorflow::DT_INT64); + dtypes_.push_back(DT_INT64); } else if (e == FLOAT || e == FLOAT_ARR) { - dtypes.push_back(tensorflow::DT_FLOAT); + dtypes_.push_back(DT_FLOAT); } else if (e == DOUBLE || e == DOUBLE_ARR) { - dtypes.push_back(tensorflow::DT_DOUBLE); + dtypes_.push_back(DT_DOUBLE); } else if (e == UCHAR || e == UCHAR_ARR) { - dtypes.push_back(tensorflow::DT_UINT8); + dtypes_.push_back(DT_UINT8); } else if (e == BOOL || e == BOOL_ARR) { - dtypes.push_back(tensorflow::DT_BOOL); + dtypes_.push_back(DT_BOOL); } else if (e == STRING || e == STRING_ARR) { - dtypes.push_back(tensorflow::DT_STRING); + dtypes_.push_back(DT_STRING); } else { LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; } @@ -109,15 +102,15 @@ void IgniteDataset::SchemaToTypes() { } void IgniteDataset::SchemaToShapes() { - for (auto e : schema) { + for (auto e : schema_) { if (e >= 1 && e < 10) { - shapes.push_back(tensorflow::PartialTensorShape({})); + shapes_.push_back(PartialTensorShape({})); } else if (e >= 12 && e < 21) { - shapes.push_back(tensorflow::PartialTensorShape({-1})); + shapes_.push_back(PartialTensorShape({-1})); } else { LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; } } } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h index 2120dfd342..d3fec5910b 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset.h +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h @@ -15,51 +15,48 @@ limitations under the License. #include "tensorflow/core/framework/dataset.h" -namespace ignite { +namespace tensorflow { -class IgniteDataset : public tensorflow::DatasetBase { +class IgniteDataset : public DatasetBase { public: - IgniteDataset(tensorflow::OpKernelContext* ctx, std::string cache_name, - std::string host, tensorflow::int32 port, bool local, - tensorflow::int32 part, tensorflow::int32 page_size, + IgniteDataset(OpKernelContext* ctx, std::string cache_name, std::string host, + int32 port, bool local, int32 part, int32 page_size, std::string username, std::string password, std::string certfile, std::string keyfile, - std::string cert_password, - std::vector schema, - std::vector permutation); + std::string cert_password, std::vector schema, + std::vector permutation); ~IgniteDataset(); - std::unique_ptr MakeIteratorInternal( - const tensorflow::string& prefix) const override; - const tensorflow::DataTypeVector& output_dtypes() const override; - const std::vector& output_shapes() - const override; - tensorflow::string DebugString() const override; + std::unique_ptr MakeIteratorInternal( + const string& prefix) const override; + const DataTypeVector& output_dtypes() const override; + const std::vector& output_shapes() const override; + string DebugString() const override; protected: - tensorflow::Status AsGraphDefInternal( - tensorflow::SerializationContext* ctx, DatasetGraphDefBuilder* b, - tensorflow::Node** output) const override; + Status AsGraphDefInternal(SerializationContext* ctx, + DatasetGraphDefBuilder* b, + Node** output) const override; private: - const std::string cache_name; - const std::string host; - const tensorflow::int32 port; - const bool local; - const tensorflow::int32 part; - const tensorflow::int32 page_size; - const std::string username; - const std::string password; - const std::string certfile; - const std::string keyfile; - const std::string cert_password; - const std::vector schema; - const std::vector permutation; - - tensorflow::DataTypeVector dtypes; - std::vector shapes; + const std::string cache_name_; + const std::string host_; + const int32 port_; + const bool local_; + const int32 part_; + const int32 page_size_; + const std::string username_; + const std::string password_; + const std::string certfile_; + const std::string keyfile_; + const std::string cert_password_; + const std::vector schema_; + const std::vector permutation_; + + DataTypeVector dtypes_; + std::vector shapes_; void SchemaToTypes(); void SchemaToShapes(); }; -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc index 03cc3c1291..1774585ecd 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc @@ -22,270 +22,262 @@ limitations under the License. #include #include -namespace ignite { - -#define CHECK_STATUS(status) \ - if (!status.ok()) return status; +namespace tensorflow { IgniteDatasetIterator::IgniteDatasetIterator( - const Params& params, std::string host, tensorflow::int32 port, - std::string cache_name, bool local, tensorflow::int32 part, - tensorflow::int32 page_size, std::string username, std::string password, - std::string certfile, std::string keyfile, std::string cert_password, - std::vector schema, - std::vector permutation) - : tensorflow::DatasetIterator(params), - cache_name(cache_name), - local(local), - part(part), - page_size(page_size), - username(username), - password(password), - schema(schema), - permutation(permutation), - remainder(-1), - cursor_id(-1), - last_page(false) { + const Params& params, std::string host, int32 port, std::string cache_name, + bool local, int32 part, int32 page_size, std::string username, + std::string password, std::string certfile, std::string keyfile, + std::string cert_password, std::vector schema, + std::vector permutation) + : DatasetIterator(params), + cache_name_(cache_name), + local_(local), + part_(part), + page_size_(page_size), + username_(username), + password_(password), + schema_(schema), + permutation_(permutation), + remainder_(-1), + cursor_id_(-1), + last_page_(false) { Client* p_client = new PlainClient(host, port); if (certfile.empty()) - client = std::unique_ptr(p_client); + client_ = std::unique_ptr(p_client); else - client = std::unique_ptr(new SslWrapper( + client_ = std::unique_ptr(new SslWrapper( std::unique_ptr(p_client), certfile, keyfile, cert_password)); LOG(INFO) << "Ignite Dataset Iterator created"; } IgniteDatasetIterator::~IgniteDatasetIterator() { - tensorflow::Status status = CloseConnection(); + Status status = CloseConnection(); if (!status.ok()) LOG(ERROR) << status.ToString(); LOG(INFO) << "Ignite Dataset Iterator destroyed"; } -tensorflow::Status IgniteDatasetIterator::EstablishConnection() { - if (!client->IsConnected()) { - tensorflow::Status status = client->Connect(); +Status IgniteDatasetIterator::EstablishConnection() { + if (!client_->IsConnected()) { + Status status = client_->Connect(); if (!status.ok()) return status; status = Handshake(); if (!status.ok()) { - tensorflow::Status disconnect_status = client->Disconnect(); + Status disconnect_status = client_->Disconnect(); if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString(); return status; } } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status IgniteDatasetIterator::CloseConnection() { - if (cursor_id != -1 && !last_page) { - tensorflow::Status conn_status = EstablishConnection(); +Status IgniteDatasetIterator::CloseConnection() { + if (cursor_id_ != -1 && !last_page_) { + Status conn_status = EstablishConnection(); if (!conn_status.ok()) return conn_status; - CHECK_STATUS(client->WriteInt(18)); // Message length - CHECK_STATUS( - client->WriteShort(close_connection_opcode)); // Operation code - CHECK_STATUS(client->WriteLong(0)); // Request ID - CHECK_STATUS(client->WriteLong(cursor_id)); // Resource ID + TF_RETURN_IF_ERROR(client_->WriteInt(18)); // Message length + TF_RETURN_IF_ERROR( + client_->WriteShort(close_connection_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Resource ID int32_t res_len; - CHECK_STATUS(client->ReadInt(res_len)); + TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); if (res_len < 12) - return tensorflow::errors::Internal( - "Close Resource Response is corrupted"); + return errors::Internal("Close Resource Response is corrupted"); int64_t req_id; - CHECK_STATUS(client->ReadLong(req_id)); + TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); int32_t status; - CHECK_STATUS(client->ReadInt(status)); + TF_RETURN_IF_ERROR(client_->ReadInt(&status)); if (status != 0) { uint8_t err_msg_header; - CHECK_STATUS(client->ReadByte(err_msg_header)); + TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); if (err_msg_header == string_val) { int32_t err_msg_length; - CHECK_STATUS(client->ReadInt(err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; - CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); std::string err_msg((char*)err_msg_c, err_msg_length); delete[] err_msg_c; - return tensorflow::errors::Internal("Close Resource Error [status=", - status, ", message=", err_msg, "]"); + return errors::Internal("Close Resource Error [status=", status, + ", message=", err_msg, "]"); } - return tensorflow::errors::Internal("Close Resource Error [status=", - status, "]"); + return errors::Internal("Close Resource Error [status=", status, "]"); } - LOG(INFO) << "Query Cursor " << cursor_id << " is closed"; + LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed"; - cursor_id = -1; + cursor_id_ = -1; - return client->Disconnect(); + return client_->Disconnect(); } else { - LOG(INFO) << "Query Cursor " << cursor_id << " is already closed"; + LOG(INFO) << "Query Cursor " << cursor_id_ << " is already closed"; } - return client->IsConnected() ? client->Disconnect() - : tensorflow::Status::OK(); + return client_->IsConnected() ? client_->Disconnect() : Status::OK(); } -tensorflow::Status IgniteDatasetIterator::GetNextInternal( - tensorflow::IteratorContext* ctx, - std::vector* out_tensors, bool* end_of_sequence) { - if (remainder == 0 && last_page) { - LOG(INFO) << "Query Cursor " << cursor_id << " is closed"; +Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) { + if (remainder_ == 0 && last_page_) { + LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed"; - cursor_id = -1; + cursor_id_ = -1; *end_of_sequence = true; - return tensorflow::Status::OK(); + return Status::OK(); } else { - tensorflow::Status status = EstablishConnection(); + Status status = EstablishConnection(); if (!status.ok()) return status; - if (remainder == -1 || remainder == 0) { - tensorflow::Status status = - remainder == -1 ? ScanQuery() : LoadNextPage(); + if (remainder_ == -1 || remainder_ == 0) { + Status status = remainder_ == -1 ? ScanQuery() : LoadNextPage(); if (!status.ok()) return status; } - uint8_t* initial_ptr = ptr; + uint8_t* initial_ptr = ptr_; std::vector types; - std::vector tensors; + std::vector tensors; - status = parser.Parse(ptr, tensors, types); // Parse key + status = parser_.Parse(&ptr_, &tensors, &types); // Parse key if (!status.ok()) return status; - status = parser.Parse(ptr, tensors, types); // Parse val + status = parser_.Parse(&ptr_, &tensors, &types); // Parse val if (!status.ok()) return status; - remainder -= (ptr - initial_ptr); + remainder_ -= (ptr_ - initial_ptr); out_tensors->resize(tensors.size()); for (int32_t i = 0; i < tensors.size(); i++) - (*out_tensors)[permutation[i]] = std::move(tensors[i]); + (*out_tensors)[permutation_[i]] = std::move(tensors[i]); *end_of_sequence = false; - return tensorflow::Status::OK(); + return Status::OK(); } *end_of_sequence = true; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status IgniteDatasetIterator::SaveInternal( - tensorflow::IteratorStateWriter* writer) { - return tensorflow::errors::Unimplemented( +Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) { + return errors::Unimplemented( "Iterator for IgniteDataset does not support 'SaveInternal'"); } -tensorflow::Status IgniteDatasetIterator::RestoreInternal( - tensorflow::IteratorContext* ctx, tensorflow::IteratorStateReader* reader) { - return tensorflow::errors::Unimplemented( +Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) { + return errors::Unimplemented( "Iterator for IgniteDataset does not support 'RestoreInternal')"); } -tensorflow::Status IgniteDatasetIterator::Handshake() { +Status IgniteDatasetIterator::Handshake() { int32_t msg_len = 8; - if (username.empty()) + if (username_.empty()) msg_len += 1; else - msg_len += 5 + username.length(); + msg_len += 5 + username_.length(); - if (password.empty()) + if (password_.empty()) msg_len += 1; else - msg_len += 5 + password.length(); - - CHECK_STATUS(client->WriteInt(msg_len)); - CHECK_STATUS(client->WriteByte(1)); - CHECK_STATUS(client->WriteShort(protocol_major_version)); - CHECK_STATUS(client->WriteShort(protocol_minor_version)); - CHECK_STATUS(client->WriteShort(protocol_patch_version)); - CHECK_STATUS(client->WriteByte(2)); - if (username.empty()) { - CHECK_STATUS(client->WriteByte(null_val)); + msg_len += 5 + password_.length(); + + TF_RETURN_IF_ERROR(client_->WriteInt(msg_len)); + TF_RETURN_IF_ERROR(client_->WriteByte(1)); + TF_RETURN_IF_ERROR(client_->WriteShort(protocol_major_version)); + TF_RETURN_IF_ERROR(client_->WriteShort(protocol_minor_version)); + TF_RETURN_IF_ERROR(client_->WriteShort(protocol_patch_version)); + TF_RETURN_IF_ERROR(client_->WriteByte(2)); + if (username_.empty()) { + TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); } else { - CHECK_STATUS(client->WriteByte(string_val)); - CHECK_STATUS(client->WriteInt(username.length())); - CHECK_STATUS( - client->WriteData((uint8_t*)username.c_str(), username.length())); + TF_RETURN_IF_ERROR(client_->WriteByte(string_val)); + TF_RETURN_IF_ERROR(client_->WriteInt(username_.length())); + TF_RETURN_IF_ERROR( + client_->WriteData((uint8_t*)username_.c_str(), username_.length())); } - if (password.empty()) { - CHECK_STATUS(client->WriteByte(null_val)); + if (password_.empty()) { + TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); } else { - CHECK_STATUS(client->WriteByte(string_val)); - CHECK_STATUS(client->WriteInt(password.length())); - CHECK_STATUS( - client->WriteData((uint8_t*)password.c_str(), password.length())); + TF_RETURN_IF_ERROR(client_->WriteByte(string_val)); + TF_RETURN_IF_ERROR(client_->WriteInt(password_.length())); + TF_RETURN_IF_ERROR( + client_->WriteData((uint8_t*)password_.c_str(), password_.length())); } int32_t handshake_res_len; - CHECK_STATUS(client->ReadInt(handshake_res_len)); + TF_RETURN_IF_ERROR(client_->ReadInt(&handshake_res_len)); uint8_t handshake_res; - CHECK_STATUS(client->ReadByte(handshake_res)); + TF_RETURN_IF_ERROR(client_->ReadByte(&handshake_res)); LOG(INFO) << "Handshake length " << handshake_res_len << ", res " << (int16_t)handshake_res; if (handshake_res != 1) { int16_t serv_ver_major; - CHECK_STATUS(client->ReadShort(serv_ver_major)); + TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_major)); int16_t serv_ver_minor; - CHECK_STATUS(client->ReadShort(serv_ver_minor)); + TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_minor)); int16_t serv_ver_patch; - CHECK_STATUS(client->ReadShort(serv_ver_patch)); + TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_patch)); uint8_t header; - CHECK_STATUS(client->ReadByte(header)); + TF_RETURN_IF_ERROR(client_->ReadByte(&header)); if (header == string_val) { int32_t length; - CHECK_STATUS(client->ReadInt(length)); + TF_RETURN_IF_ERROR(client_->ReadInt(&length)); uint8_t* err_msg_c = new uint8_t[length]; - CHECK_STATUS(client->ReadData(err_msg_c, length)); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, length)); std::string err_msg((char*)err_msg_c, length); delete[] err_msg_c; - return tensorflow::errors::Internal( - "Handshake Error [result=", handshake_res, ", version=", - serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, - ", message='", err_msg, "']"); + return errors::Internal("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, ", message='", err_msg, + "']"); } else if (header == null_val) { - return tensorflow::errors::Internal( - "Handshake Error [result=", handshake_res, ", version=", - serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]"); + return errors::Internal("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); } else { - return tensorflow::errors::Internal( - "Handshake Error [result=", handshake_res, ", version=", - serv_ver_major, ".", serv_ver_minor, ".", serv_ver_patch, "]"); + return errors::Internal("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); } } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status IgniteDatasetIterator::ScanQuery() { - CHECK_STATUS(client->WriteInt(25)); // Message length - CHECK_STATUS(client->WriteShort(scan_query_opcode)); // Operation code - CHECK_STATUS(client->WriteLong(0)); // Request ID - CHECK_STATUS(client->WriteInt(JavaHashCode(cache_name))); // Cache name - CHECK_STATUS(client->WriteByte(0)); // Flags - CHECK_STATUS(client->WriteByte(null_val)); // Filter object - CHECK_STATUS(client->WriteInt(page_size)); // Cursor page size - CHECK_STATUS(client->WriteInt(part)); // Partition to query - CHECK_STATUS(client->WriteByte(local)); // Local flag +Status IgniteDatasetIterator::ScanQuery() { + TF_RETURN_IF_ERROR(client_->WriteInt(25)); // Message length + TF_RETURN_IF_ERROR(client_->WriteShort(scan_query_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR( + client_->WriteInt(JavaHashCode(cache_name_))); // Cache name + TF_RETURN_IF_ERROR(client_->WriteByte(0)); // Flags + TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); // Filter object + TF_RETURN_IF_ERROR(client_->WriteInt(page_size_)); // Cursor page size + TF_RETURN_IF_ERROR(client_->WriteInt(part_)); // part_ition to query + TF_RETURN_IF_ERROR(client_->WriteByte(local_)); // local_ flag int64_t wait_start = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); int32_t res_len; - CHECK_STATUS(client->ReadInt(res_len)); + TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); int64_t wait_stop = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) @@ -293,82 +285,81 @@ tensorflow::Status IgniteDatasetIterator::ScanQuery() { LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms"; - if (res_len < 12) - return tensorflow::errors::Internal("Scan Query Response is corrupted"); + if (res_len < 12) return errors::Internal("Scan Query Response is corrupted"); int64_t req_id; - CHECK_STATUS(client->ReadLong(req_id)); + TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); int32_t status; - CHECK_STATUS(client->ReadInt(status)); + TF_RETURN_IF_ERROR(client_->ReadInt(&status)); if (status != 0) { uint8_t err_msg_header; - CHECK_STATUS(client->ReadByte(err_msg_header)); + TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); if (err_msg_header == string_val) { int32_t err_msg_length; - CHECK_STATUS(client->ReadInt(err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; - CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); std::string err_msg((char*)err_msg_c, err_msg_length); delete[] err_msg_c; - return tensorflow::errors::Internal("Scan Query Error [status=", status, - ", message=", err_msg, "]"); + return errors::Internal("Scan Query Error [status=", status, ", message=", + err_msg, "]"); } - return tensorflow::errors::Internal("Scan Query Error [status=", status, - "]"); + return errors::Internal("Scan Query Error [status=", status, "]"); } - CHECK_STATUS(client->ReadLong(cursor_id)); + TF_RETURN_IF_ERROR(client_->ReadLong(&cursor_id_)); - LOG(INFO) << "Query Cursor " << cursor_id << " is opened"; + LOG(INFO) << "Query Cursor " << cursor_id_ << " is opened"; int32_t row_cnt; - CHECK_STATUS(client->ReadInt(row_cnt)); + TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); - remainder = res_len - 25; - page = std::unique_ptr(new uint8_t[remainder]); - ptr = page.get(); + remainder_ = res_len - 25; + page_ = std::unique_ptr(new uint8_t[remainder_]); + ptr_ = page_.get(); int64_t start = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); - CHECK_STATUS(client->ReadData(ptr, remainder)); + TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_)); int64_t stop = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); ; - double size_in_mb = 1.0 * remainder / 1024 / 1024; + double size_in_mb = 1.0 * remainder_ / 1024 / 1024; double time_in_s = 1.0 * (stop - start) / 1000; LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; uint8_t last_page_b; - CHECK_STATUS(client->ReadByte(last_page_b)); + TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b)); - last_page = !last_page_b; + last_page_ = !last_page_b; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status IgniteDatasetIterator::LoadNextPage() { - CHECK_STATUS(client->WriteInt(18)); // Message length - CHECK_STATUS(client->WriteShort(load_next_page_opcode)); // Operation code - CHECK_STATUS(client->WriteLong(0)); // Request ID - CHECK_STATUS(client->WriteLong(cursor_id)); // Cursor ID +Status IgniteDatasetIterator::LoadNextPage() { + TF_RETURN_IF_ERROR(client_->WriteInt(18)); // Message length + TF_RETURN_IF_ERROR( + client_->WriteShort(load_next_page_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Cursor ID int64_t wait_start = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); int32_t res_len; - CHECK_STATUS(client->ReadInt(res_len)); + TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); int64_t wait_stop = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) @@ -377,66 +368,65 @@ tensorflow::Status IgniteDatasetIterator::LoadNextPage() { LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms"; if (res_len < 12) - return tensorflow::errors::Internal("Load Next Page Response is corrupted"); + return errors::Internal("Load Next Page Response is corrupted"); int64_t req_id; - CHECK_STATUS(client->ReadLong(req_id)); + TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); int32_t status; - CHECK_STATUS(client->ReadInt(status)); + TF_RETURN_IF_ERROR(client_->ReadInt(&status)); if (status != 0) { uint8_t err_msg_header; - CHECK_STATUS(client->ReadByte(err_msg_header)); + TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); if (err_msg_header == string_val) { int32_t err_msg_length; - CHECK_STATUS(client->ReadInt(err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; - CHECK_STATUS(client->ReadData(err_msg_c, err_msg_length)); + TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); std::string err_msg((char*)err_msg_c, err_msg_length); delete[] err_msg_c; - return tensorflow::errors::Internal("Load Next Page Error [status=", - status, ", message=", err_msg, "]"); + return errors::Internal("Load Next Page Error [status=", status, + ", message=", err_msg, "]"); } - return tensorflow::errors::Internal("Load Next Page Error [status=", status, - "]"); + return errors::Internal("Load Next Page Error [status=", status, "]"); } int32_t row_cnt; - CHECK_STATUS(client->ReadInt(row_cnt)); + TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); - remainder = res_len - 17; - page = std::unique_ptr(new uint8_t[remainder]); - ptr = page.get(); + remainder_ = res_len - 17; + page_ = std::unique_ptr(new uint8_t[remainder_]); + ptr_ = page_.get(); int64_t start = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); - CHECK_STATUS(client->ReadData(ptr, remainder)); + TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_)); int64_t stop = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch()) .count(); ; - double size_in_mb = 1.0 * remainder / 1024 / 1024; + double size_in_mb = 1.0 * remainder_ / 1024 / 1024; double time_in_s = 1.0 * (stop - start) / 1000; LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; uint8_t last_page_b; - CHECK_STATUS(client->ReadByte(last_page_b)); + TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b)); - last_page = !last_page_b; + last_page_ = !last_page_b; - return tensorflow::Status::OK(); + return Status::OK(); } -int32_t IgniteDatasetIterator::JavaHashCode(std::string str) { +int32_t IgniteDatasetIterator::JavaHashCode(std::string str) const { int32_t h = 0; for (char& c : str) { h = 31 * h + c; @@ -444,4 +434,4 @@ int32_t IgniteDatasetIterator::JavaHashCode(std::string str) { return h; } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h index d1df4527f9..5858dbfcb9 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h @@ -14,65 +14,55 @@ limitations under the License. ==============================================================================*/ #include "ignite_binary_object_parser.h" -#include "ignite_dataset.h" - -#ifndef IGNITE_CLIENT_H -#define IGNITE_CLIENT_H #include "ignite_client.h" -#endif +#include "ignite_dataset.h" -namespace ignite { +namespace tensorflow { -class IgniteDatasetIterator - : public tensorflow::DatasetIterator { +class IgniteDatasetIterator : public DatasetIterator { public: - IgniteDatasetIterator(const Params& params, std::string host, - tensorflow::int32 port, std::string cache_name, - bool local, tensorflow::int32 part, - tensorflow::int32 page_size, std::string username, + IgniteDatasetIterator(const Params& params, std::string host, int32 port, + std::string cache_name, bool local, int32 part, + int32 page_size, std::string username, std::string password, std::string certfile, std::string keyfile, std::string cert_password, - std::vector schema, - std::vector permutation); + std::vector schema, + std::vector permutation); ~IgniteDatasetIterator(); - tensorflow::Status GetNextInternal( - tensorflow::IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) override; + Status GetNextInternal(IteratorContext* ctx, std::vector* out_tensors, + bool* end_of_sequence) override; protected: - tensorflow::Status SaveInternal( - tensorflow::IteratorStateWriter* writer) override; - tensorflow::Status RestoreInternal( - tensorflow::IteratorContext* ctx, - tensorflow::IteratorStateReader* reader) override; + Status SaveInternal(IteratorStateWriter* writer) override; + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override; private: - std::unique_ptr client; - BinaryObjectParser parser; + std::unique_ptr client_; + BinaryObjectParser parser_; - const std::string cache_name; - const bool local; - const tensorflow::int32 part; - const tensorflow::int32 page_size; - const std::string username; - const std::string password; - const std::vector schema; - const std::vector permutation; + const std::string cache_name_; + const bool local_; + const int32 part_; + const int32 page_size_; + const std::string username_; + const std::string password_; + const std::vector schema_; + const std::vector permutation_; - int32_t remainder; - int64_t cursor_id; - bool last_page; + int32_t remainder_; + int64_t cursor_id_; + bool last_page_; - std::unique_ptr page; - uint8_t* ptr; + std::unique_ptr page_; + uint8_t* ptr_; - tensorflow::Status EstablishConnection(); - tensorflow::Status CloseConnection(); - tensorflow::Status Handshake(); - tensorflow::Status ScanQuery(); - tensorflow::Status LoadNextPage(); - int32_t JavaHashCode(std::string str); + Status EstablishConnection(); + Status CloseConnection(); + Status Handshake(); + Status ScanQuery(); + Status LoadNextPage(); + int32_t JavaHashCode(std::string str) const; }; constexpr uint8_t null_val = 101; @@ -84,4 +74,4 @@ constexpr int16_t scan_query_opcode = 2000; constexpr int16_t load_next_page_opcode = 2001; constexpr int16_t close_connection_opcode = 0; -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index 543b5e4afc..89eecf9c14 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/dataset.h" namespace tensorflow { +namespace { class IgniteDatasetOp : public DatasetOpKernel { public: @@ -132,14 +133,15 @@ class IgniteDatasetOp : public DatasetOpKernel { permutation.push_back(permutation_tensor->flat()(i)); } - *output = new ignite::IgniteDataset( - ctx, cache_name, host, port, local, part, page_size, username, password, - certfile, keyfile, cert_password, std::move(schema), - std::move(permutation)); + *output = + new IgniteDataset(ctx, cache_name, host, port, local, part, page_size, + username, password, certfile, keyfile, cert_password, + std::move(schema), std::move(permutation)); } }; REGISTER_KERNEL_BUILDER(Name("IgniteDataset").Device(DEVICE_CPU), IgniteDatasetOp); +} // namespace } // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h index 5491af68d6..6f417a3cb5 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h @@ -13,31 +13,28 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef IGNITE_CLIENT_H -#define IGNITE_CLIENT_H #include "ignite_client.h" -#endif #include -namespace ignite { +namespace tensorflow { class PlainClient : public Client { public: PlainClient(std::string host, int port); ~PlainClient(); - virtual tensorflow::Status Connect(); - virtual tensorflow::Status Disconnect(); + virtual Status Connect(); + virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length); - virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length); + virtual Status ReadData(uint8_t* buf, int32_t length); + virtual Status WriteData(uint8_t* buf, int32_t length); private: - std::string host; - int port; - int sock; + const std::string host_; + const int port_; + int sock_; }; -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc index dbfa4f8786..a4c58a9563 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc @@ -29,104 +29,98 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" -namespace ignite { +namespace tensorflow { PlainClient::PlainClient(std::string host, int port) - : host(host), port(port), sock(-1) {} + : host_(host), port_(port), sock_(-1) {} PlainClient::~PlainClient() { if (IsConnected()) { - tensorflow::Status status = Disconnect(); + Status status = Disconnect(); if (!status.ok()) LOG(WARNING) << status.ToString(); } } -tensorflow::Status PlainClient::Connect() { - if (sock == -1) { - sock = socket(AF_INET, SOCK_STREAM, 0); - if (sock == -1) - return tensorflow::errors::Internal("Failed to create socket"); +Status PlainClient::Connect() { + if (sock_ == -1) { + sock_ = socket(AF_INET, SOCK_STREAM, 0); + if (sock_ == -1) return errors::Internal("Failed to create socket"); } sockaddr_in server; - server.sin_addr.s_addr = inet_addr(host.c_str()); + server.sin_addr.s_addr = inet_addr(host_.c_str()); if (server.sin_addr.s_addr == -1) { hostent* he; in_addr** addr_list; - if ((he = gethostbyname(host.c_str())) == NULL) - return tensorflow::errors::Internal("Failed to resolve hostname \"", host, - "\""); + if ((he = gethostbyname(host_.c_str())) == NULL) + return errors::Internal("Failed to resolve hostname \"", host_, "\""); addr_list = (in_addr**)he->h_addr_list; if (addr_list[0] != NULL) server.sin_addr = *addr_list[0]; } server.sin_family = AF_INET; - server.sin_port = htons(port); + server.sin_port = htons(port_); - if (connect(sock, (sockaddr*)&server, sizeof(server)) < 0) - return tensorflow::errors::Internal("Failed to connect to \"", host, ":", - port, "\""); + if (connect(sock_, (sockaddr*)&server, sizeof(server)) < 0) + return errors::Internal("Failed to connect to \"", host_, ":", port_, "\""); - LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established"; + LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established"; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status PlainClient::Disconnect() { - int close_res = close(sock); - sock = -1; +Status PlainClient::Disconnect() { + int close_res = close(sock_); + sock_ = -1; - LOG(INFO) << "Connection to \"" << host << ":" << port << "\" is closed"; + LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" is closed"; - return close_res == 0 ? tensorflow::Status::OK() - : tensorflow::errors::Internal( - "Failed to correctly close connection"); + return close_res == 0 + ? Status::OK() + : errors::Internal("Failed to correctly close connection"); } -bool PlainClient::IsConnected() { return sock != -1; } +bool PlainClient::IsConnected() { return sock_ != -1; } -int PlainClient::GetSocketDescriptor() { return sock; } +int PlainClient::GetSocketDescriptor() { return sock_; } -tensorflow::Status PlainClient::ReadData(uint8_t* buf, int32_t length) { +Status PlainClient::ReadData(uint8_t* buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = recv(sock, buf, length - recieved, 0); + int res = recv(sock_, buf, length - recieved, 0); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while reading from socket: ", res, ", ", - std::string(strerror(errno))); + return errors::Internal("Error occured while reading from socket: ", res, + ", ", std::string(strerror(errno))); - if (res == 0) - return tensorflow::errors::Internal("Server closed connection"); + if (res == 0) return errors::Internal("Server closed connection"); recieved += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status PlainClient::WriteData(uint8_t* buf, int32_t length) { +Status PlainClient::WriteData(uint8_t* buf, int32_t length) { int sent = 0; while (sent < length) { - int res = send(sock, buf, length - sent, 0); + int res = send(sock_, buf, length - sent, 0); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while writing into socket: ", res, ", ", - std::string(strerror(errno))); + return errors::Internal("Error occured while writing into socket: ", res, + ", ", std::string(strerror(errno))); sent += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index f78c9b3627..7ba037f2d2 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -27,48 +27,45 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" -namespace ignite { +namespace tensorflow { PlainClient::PlainClient(std::string host, int port) - : host(host), port(port), sock(INVALID_SOCKET) {} + : host_(host), port_(port), sock_(INVALID_SOCKET) {} PlainClient::~PlainClient() { if (IsConnected()) { - tensorflow::Status status = Disconnect(); + Status status = Disconnect(); if (!status.ok()) LOG(WARNING) << status.ToString(); } } -tensorflow::Status PlainClient::Connect() { +Status PlainClient::Connect() { WSADATA wsaData; addrinfo *result = NULL, *ptr = NULL, hints; int res = WSAStartup(MAKEWORD(2, 2), &wsaData); - if (res != 0) - return tensorflow::errors::Internal("WSAStartup failed with error: ", res); + if (res != 0) return errors::Internal("WSAStartup failed with error: ", res); ZeroMemory(&hints, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; - res = - getaddrinfo(host.c_str(), std::to_string(port).c_str(), &hints, &result); - if (res != 0) - return tensorflow::errors::Internal("Getaddrinfo failed with error: ", res); + res = getaddrinfo(host_.c_str(), std::to_string(port_).c_str(), &hints, + &result); + if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res); for (ptr = result; ptr != NULL; ptr = ptr->ai_next) { - sock = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); - if (sock == INVALID_SOCKET) { + sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); + if (sock_ == INVALID_SOCKET) { WSACleanup(); - return tensorflow::errors::Internal("Socket failed with error: ", - WSAGetLastError()); + return errors::Internal("Socket failed with error: ", WSAGetLastError()); } - res = connect(sock, ptr->ai_addr, (int)ptr->ai_addrlen); + res = connect(sock_, ptr->ai_addr, (int)ptr->ai_addrlen); if (res == SOCKET_ERROR) { - closesocket(sock); - sock = INVALID_SOCKET; + closesocket(sock_); + sock_ = INVALID_SOCKET; continue; } @@ -77,67 +74,63 @@ tensorflow::Status PlainClient::Connect() { freeaddrinfo(result); - if (sock == INVALID_SOCKET) { + if (sock_ == INVALID_SOCKET) { WSACleanup(); - return tensorflow::errors::Internal("Unable to connect to server"); + return errors::Internal("Unable to connect to server"); } - LOG(INFO) << "Connection to \"" << host << ":" << port << "\" established"; + LOG(INFO) << "Connection to \"" << host_ << ":" << port_ << "\" established"; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status PlainClient::Disconnect() { - int res = shutdown(sock, SD_SEND); - closesocket(sock); +Status PlainClient::Disconnect() { + int res = shutdown(sock_, SD_SEND); + closesocket(sock_); WSACleanup(); if (res == SOCKET_ERROR) - return tensorflow::errors::Internal("Shutdown failed with error: ", - WSAGetLastError()); + return errors::Internal("Shutdown failed with error: ", WSAGetLastError()); else - return tensorflow::Status::OK(); + return Status::OK(); } -bool PlainClient::IsConnected() { return sock != INVALID_SOCKET; } +bool PlainClient::IsConnected() { return sock_ != INVALID_SOCKET; } -int PlainClient::GetSocketDescriptor() { return sock; } +int PlainClient::GetSocketDescriptor() { return sock_; } -tensorflow::Status PlainClient::ReadData(uint8_t *buf, int32_t length) { +Status PlainClient::ReadData(uint8_t *buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = recv(sock, buf, length - recieved, 0); + int res = recv(sock_, buf, length - recieved, 0); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while reading from socket: ", res); + return errors::Internal("Error occured while reading from socket: ", res); - if (res == 0) - return tensorflow::errors::Internal("Server closed connection"); + if (res == 0) return errors::Internal("Server closed connection"); recieved += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status PlainClient::WriteData(uint8_t *buf, int32_t length) { +Status PlainClient::WriteData(uint8_t *buf, int32_t length) { int sent = 0; while (sent < length) { - int res = send(sock, buf, length - sent, 0); + int res = send(sock_, buf, length - sent, 0); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while writing into socket: ", res); + return errors::Internal("Error occured while writing into socket: ", res); sent += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc index a1101b91f3..a2bc6b9609 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc @@ -21,7 +21,7 @@ limitations under the License. #include #include -namespace ignite { +namespace tensorflow { static int PasswordCb(char *buf, int size, int rwflag, void *password) { strncpy(buf, (char *)(password), size); @@ -31,119 +31,112 @@ static int PasswordCb(char *buf, int size, int rwflag, void *password) { SslWrapper::SslWrapper(std::shared_ptr client, std::string certfile, std::string keyfile, std::string cert_password) - : client(client), - certfile(certfile), - keyfile(keyfile), - cert_password(cert_password), - ctx(NULL) {} + : client_(client), + certfile_(certfile), + keyfile_(keyfile), + cert_password_(cert_password), + ctx_(NULL) {} SslWrapper::~SslWrapper() { if (IsConnected()) { - tensorflow::Status status = Disconnect(); + Status status = Disconnect(); if (!status.ok()) LOG(WARNING) << status.ToString(); } - if (ctx != NULL) { - SSL_CTX_free(ctx); - ctx = NULL; + if (ctx_ != NULL) { + SSL_CTX_free(ctx_); + ctx_ = NULL; } } -tensorflow::Status SslWrapper::InitSslContext() { +Status SslWrapper::InitSslContext() { OpenSSL_add_all_algorithms(); SSL_load_error_strings(); - ctx = SSL_CTX_new(SSLv23_method()); - if (ctx == NULL) - return tensorflow::errors::Internal("Couldn't create SSL context"); + ctx_ = SSL_CTX_new(SSLv23_method()); + if (ctx_ == NULL) return errors::Internal("Couldn't create SSL context"); - SSL_CTX_set_default_passwd_cb(ctx, PasswordCb); - SSL_CTX_set_default_passwd_cb_userdata(ctx, (void *)cert_password.c_str()); + SSL_CTX_set_default_passwd_cb(ctx_, PasswordCb); + SSL_CTX_set_default_passwd_cb_userdata(ctx_, (void *)cert_password_.c_str()); - if (SSL_CTX_use_certificate_chain_file(ctx, certfile.c_str()) != 1) - return tensorflow::errors::Internal( - "Couldn't load cetificate chain (file '", certfile, "')"); + if (SSL_CTX_use_certificate_chain_file(ctx_, certfile_.c_str()) != 1) + return errors::Internal("Couldn't load cetificate chain (file '", certfile_, + "')"); - std::string private_key_file = keyfile.empty() ? certfile : keyfile; - if (SSL_CTX_use_PrivateKey_file(ctx, private_key_file.c_str(), + std::string private_key_file = keyfile_.empty() ? certfile_ : keyfile_; + if (SSL_CTX_use_PrivateKey_file(ctx_, private_key_file.c_str(), SSL_FILETYPE_PEM) != 1) - return tensorflow::errors::Internal("Couldn't load private key (file '", - private_key_file, "')"); + return errors::Internal("Couldn't load private key (file '", + private_key_file, "')"); - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status SslWrapper::Connect() { - tensorflow::Status status; - - if (ctx == NULL) { - status = InitSslContext(); - if (!status.ok()) return status; +Status SslWrapper::Connect() { + if (ctx_ == NULL) { + TF_RETURN_IF_ERROR(InitSslContext()); } - ssl = SSL_new(ctx); - if (ssl == NULL) - return tensorflow::errors::Internal("Failed to establish SSL connection"); + ssl_ = SSL_new(ctx_); + if (ssl_ == NULL) + return errors::Internal("Failed to establish SSL connection"); - status = client->Connect(); - if (!status.ok()) return status; + TF_RETURN_IF_ERROR(client_->Connect()); - SSL_set_fd(ssl, client->GetSocketDescriptor()); - if (SSL_connect(ssl) != 1) - return tensorflow::errors::Internal("Failed to establish SSL connection"); + SSL_set_fd(ssl_, client_->GetSocketDescriptor()); + if (SSL_connect(ssl_) != 1) + return errors::Internal("Failed to establish SSL connection"); LOG(INFO) << "SSL connection established"; - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status SslWrapper::Disconnect() { - SSL_free(ssl); +Status SslWrapper::Disconnect() { + SSL_free(ssl_); LOG(INFO) << "SSL connection closed"; - return client->Disconnect(); + return client_->Disconnect(); } -bool SslWrapper::IsConnected() { return client->IsConnected(); } +bool SslWrapper::IsConnected() { return client_->IsConnected(); } -int SslWrapper::GetSocketDescriptor() { return client->GetSocketDescriptor(); } +int SslWrapper::GetSocketDescriptor() { return client_->GetSocketDescriptor(); } -tensorflow::Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { +Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = SSL_read(ssl, buf, length - recieved); + int res = SSL_read(ssl_, buf, length - recieved); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while reading from SSL socket: ", res); + return errors::Internal("Error occured while reading from SSL socket: ", + res); - if (res == 0) - return tensorflow::errors::Internal("Server closed SSL connection"); + if (res == 0) return errors::Internal("Server closed SSL connection"); recieved += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -tensorflow::Status SslWrapper::WriteData(uint8_t *buf, int32_t length) { +Status SslWrapper::WriteData(uint8_t *buf, int32_t length) { int sent = 0; while (sent < length) { - int res = SSL_write(ssl, buf, length - sent); + int res = SSL_write(ssl_, buf, length - sent); if (res < 0) - return tensorflow::errors::Internal( - "Error occured while writing into socket: ", res); + return errors::Internal("Error occured while writing into socket: ", res); sent += res; buf += res; } - return tensorflow::Status::OK(); + return Status::OK(); } -} // namespace ignite +} // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h index e0c2a242dc..bbba6cc181 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h @@ -13,15 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef IGNITE_CLIENT_H -#define IGNITE_CLIENT_H #include "ignite_client.h" -#endif #include #include -namespace ignite { +namespace tensorflow { class SslWrapper : public Client { public: @@ -29,21 +26,22 @@ class SslWrapper : public Client { std::string keyfile, std::string cert_password); ~SslWrapper(); - virtual tensorflow::Status Connect(); - virtual tensorflow::Status Disconnect(); + virtual Status Connect(); + virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual tensorflow::Status ReadData(uint8_t* buf, int32_t length); - virtual tensorflow::Status WriteData(uint8_t* buf, int32_t length); + virtual Status ReadData(uint8_t* buf, int32_t length); + virtual Status WriteData(uint8_t* buf, int32_t length); private: - std::shared_ptr client; - std::string certfile; - std::string keyfile; - std::string cert_password; - SSL_CTX* ctx; - SSL* ssl; - tensorflow::Status InitSslContext(); + std::shared_ptr client_; + std::string certfile_; + std::string keyfile_; + std::string cert_password_; + SSL_CTX* ctx_; + SSL* ssl_; + + Status InitSslContext(); }; -} // namespace ignite +} // namespace tensorflow -- GitLab From 1408a1563e73e69f68c1eb6f34a0976c7c950ad9 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Tue, 28 Aug 2018 11:32:57 +0300 Subject: [PATCH 030/570] Update README.md. --- tensorflow/contrib/ignite/README.md | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/ignite/README.md b/tensorflow/contrib/ignite/README.md index f2596fc572..8fec4066c4 100644 --- a/tensorflow/contrib/ignite/README.md +++ b/tensorflow/contrib/ignite/README.md @@ -13,19 +13,20 @@ ## Overview [Apache Ignite](https://ignite.apache.org/) is a memory-centric distributed database, caching, and processing platform for -transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a datasource for neural network training, inference and all other computations supported by TensorFlow. +transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an integration between Apache Ignite and TensorFlow. The integration is based on [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) from TensorFlow side and [Binary Client Protocol](https://apacheignite.readme.io/v2.6/docs/binary-client-protocol) from Apache Ignite side. It allows to use Apache Ignite as a data source for neural network training, inference and all other computations supported by TensorFlow. ## Features -Ignite Dataset provides a set of features that makes it possible to use it in a wide range of cases. The most important and interesting features are described below. +Ignite Dataset provides features that that you can use in a wide range of cases. The most important and interesting features are described below. ### Distributed In-Memory Datasource -[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that allows to avoid limitations of hard drive and provide high reading speed and ability to store and operate with as much data as you need in distributed cluster. Using of Ignite Dataset makes it possible to utilize all these advantages. +[Apache Ignite](https://ignite.apache.org/) is a distributed in-memory database, caching, and processing platform that provides fast data access. It allows you to avoid limitations of hard drive and and store and operate with as much data as you need in distributed cluster. You can utilize +these benefits of Apache Ignite by using Ignite Dataset. Moreover, Ignite Dataset can be used for the following use-cases: - If you have a **gigabyte** of data you can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations. At the same time, you can store your data in Apache Ignite on the same machine and use it as a datasource for TensorFlow and thus avoid these limitations. - If you have a **terabyte** of data you probably still can keep it on a single machine on a hard drive, but you will face with hard drive speed limitations again. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow and thus avoid these limitations. - If you have a **petabyte** of data you can't keep it on a single machine. At the same time, you can store your data in Apache Ignite distributed in-memory cluster and use it as a datasource for TensorFlow. -It's important that Apache Ignite is not just a step of ETL pipeline between database or data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. Choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, in the same time, an ability to use this data for neural network training and inference. +Note that Apache Ignite is not just a step of ETL pipeline between a database or a data warehouse and TensorFlow. Apache Ignite is a high-grade database itself. By choosing Apache Ignite and TensorFlow you are getting everything you need to work with operational or historical data and, at the same time, an ability to use this data for neural network training and inference. ```bash $ apache-ignite-fabric/bin/ignite.sh @@ -55,7 +56,7 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL ``` ### Structured Objects -[Apache Ignite](https://ignite.apache.org/) allows to store any objects you would like to store. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects. +[Apache Ignite](https://ignite.apache.org/) allows to store any type of objects. These objects can have any hierarchy. Ignite Dataset provides an ability to work with such objects. ```python >>> import tensorflow as tf @@ -81,7 +82,7 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL } } ``` - Neural network training and other computations require transformations that can be done as part of [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset. + Neural network training and other computations require transformations that can be done as part of [tf.data](https://www.tensorflow.org/api_docs/python/tf/data) pipeline if you use Ignite Dataset. ```python >>> import tensorflow as tf @@ -99,15 +100,15 @@ jdbc:ignite:thin://localhost/> INSERT INTO KITTEN_CACHE VALUES (3, 'LITTLE BALL ### Distributed Training -TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is an ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. +TensorFlow is a machine learning framework that [natively supports](https://www.tensorflow.org/deploy/distributed) distributed neural network training, inference and other computations. The main idea behind the distributed neural network training is the ability to calculate gradients of loss functions (squares of the errors) on every partition of data (in terms of horizontal partitioning) and then sum them to get loss function gradient of the whole dataset. -Utilizing this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottleneck. +Using this ability we can calculate gradients on the nodes the data is stored on, reduce them and then finally update model parameters. It allows to avoid data transfers between nodes and thus to avoid network bottlenecks. -Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL) we can specify the number of partitions the data will be partitioned on. If, for example, Apache Ignite cluster consists of 10 machines and we creates cache with 10 partitions then every machine will maintain approximately one data partition. +Apache Ignite uses horizontal partitioning to store data in distributed cluster. When we create Apache Ignite cache (or table in terms of SQL), we can specify the number of partitions the data will be partitioned on. For example, if an Apache Ignite cluster consists of 10 machines and we create cache with 10 partitions, then every machine will maintain approximately one data partition. -Ignite Dataset allows to utilize these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that might be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach we are able to assign specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset. +Ignite Dataset allows using these two aspects of distributed neural network training (using TensorFlow) and Apache Ignite partitioning. Ignite Dataset is a computation graph operation that can be performed on a remote worker. The remote worker can override Ignite Dataset parameters (such as `host`, `port` or `part`) by setting correstondent environment variables for worker process (such as `IGNITE_DATASET_HOST`, `IGNITE_DATASET_PORT` or `IGNITE_DATASET_PART`). Using this overriding approach, we can assign a specific partition to every worker so that one worker handles one partition and, at the same time, transparently work with single dataset. ```python >>> import tensorflow as tf @@ -135,7 +136,7 @@ High-level TensorFlow API for [distributed training](https://www.tensorflow.org/ ### SSL Connection -Your data should not be accessible without any control. Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information please see [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation. +Apache Ignite allows to protect data transfer channels by [SSL](https://en.wikipedia.org/wiki/Transport_Layer_Security) and authentification. Ignite Dataset supports both SSL connection with and without authntication. For more information, please refer to the [Apache Ignite SSL/TLS](https://apacheignite.readme.io/docs/ssltls) documentation. ```python >>> import tensorflow as tf @@ -147,11 +148,11 @@ Your data should not be accessible without any control. Apache Ignite allows to ### Windows Support -Ignite Dataset is fully compatible with Windows, so you can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems. +Ignite Dataset is fully compatible with Windows. You can use it as part of TensorFlow on your Windows workstation as well as on Linux/MacOS systems. ## Try it out -The simplest way to try Ignite Dataset out is to run [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and then interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine: +The simplest way to try Ignite Dataset is to run a [Docker](https://www.docker.com/) container with Apache Ignite and loaded [MNIST](http://yann.lecun.com/exdb/mnist/) data and after start interruct with it using Ignite Dataset. Such container is available on Docker Hub: [dmitrievanthony/ignite-with-mnist](https://hub.docker.com/r/dmitrievanthony/ignite-with-mnist/). You need to start this container on your machine: ``` docker run -it -p 10800:10800 dmitrievanthony/ignite-with-mnist @@ -163,4 +164,4 @@ After that you will be able to work with it following way: ## Limitations -Presently Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of object structures. +Presently, Ignite Dataset works with assumption that all objects in the cache have the same structure (homogeneous objects) and the cache contains at least one object. Another limitation concerns structured objects, Ignite Dataset does not support UUID, Maps and Object arrays that might be parts of an object structure. -- GitLab From 92019765d7b7db99d0235268d00f349b7a53d1a9 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Wed, 5 Sep 2018 14:47:20 +0000 Subject: [PATCH 031/570] Fix pylint checks, fix VS compilation issue. --- .../contrib/ignite/kernels/ignite_plain_client_windows.cc | 4 ++-- .../contrib/ignite/python/ops/ignite_dataset_ops.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index 7ba037f2d2..e1e2ee3b20 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -103,7 +103,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = recv(sock_, buf, length - recieved, 0); + int res = recv(sock_, (char*)buf, length - recieved, 0); if (res < 0) return errors::Internal("Error occured while reading from socket: ", res); @@ -121,7 +121,7 @@ Status PlainClient::WriteData(uint8_t *buf, int32_t length) { int sent = 0; while (sent < length) { - int res = send(sock_, buf, length - sent, 0); + int res = send(sock_, (char*)buf, length - sent, 0); if (res < 0) return errors::Internal("Error occured while writing into socket: ", res); diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py index 6fa073957a..60003ca3b7 100644 --- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py +++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py @@ -66,13 +66,13 @@ class Readable(): def __read(self, data_type, length): """Reads, unpacks and returns specified type (little-endian).""" - buffer = self.read_data(length) - return struct.unpack("<" + data_type, buffer)[0] + data_buffer = self.read_data(length) + return struct.unpack("<" + data_type, data_buffer)[0] class DataBuffer(Readable): """DataBuffer class that exposes methods to read data from a byte buffer.""" - def __init__(self, buffer): + def __init__(self, data_buffer): """Constructs a new instance of DataBuffer based on the specified byte buffer. @@ -80,7 +80,7 @@ class DataBuffer(Readable): buffer: Buffer to be read. """ Readable.__init__(self) - self.buffer = buffer + self.buffer = data_buffer self.ptr = 0 def read_data(self, length): -- GitLab From 0b6654bc223f4f3807209043dc34ccb07b55474e Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Tue, 11 Sep 2018 09:50:47 +0000 Subject: [PATCH 032/570] Fix code style. --- .../ignite/kernels/ignite_dataset_ops.cc | 2 +- .../kernels/ignite_plain_client_windows.cc | 4 +-- tensorflow/contrib/ignite/ops/dataset_ops.cc | 34 +++++++++---------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index 89eecf9c14..d03404a460 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_dataset.h" #include +#include "ignite_dataset.h" #include "tensorflow/core/framework/dataset.h" namespace tensorflow { diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index e1e2ee3b20..8182fde6d9 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -103,7 +103,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) { int recieved = 0; while (recieved < length) { - int res = recv(sock_, (char*)buf, length - recieved, 0); + int res = recv(sock_, (char *)buf, length - recieved, 0); if (res < 0) return errors::Internal("Error occured while reading from socket: ", res); @@ -121,7 +121,7 @@ Status PlainClient::WriteData(uint8_t *buf, int32_t length) { int sent = 0; while (sent < length) { - int res = send(sock_, (char*)buf, length - sent, 0); + int res = send(sock_, (char *)buf, length - sent, 0); if (res < 0) return errors::Internal("Error occured while writing into socket: ", res); diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc index 17494d1cfd..fb16b290b1 100644 --- a/tensorflow/contrib/ignite/ops/dataset_ops.cc +++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc @@ -20,23 +20,23 @@ limitations under the License. namespace tensorflow { REGISTER_OP("IgniteDataset") - .Input("cache_name: string") - .Input("host: string") - .Input("port: int32") - .Input("local: bool") - .Input("part: int32") - .Input("page_size: int32") - .Input("username: string") - .Input("password: string") - .Input("certfile: string") - .Input("keyfile: string") - .Input("cert_password: string") - .Input("schema: int32") - .Input("permutation: int32") - .Output("handle: variant") - .SetIsStateful() - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( + .Input("cache_name: string") + .Input("host: string") + .Input("port: int32") + .Input("local: bool") + .Input("part: int32") + .Input("page_size: int32") + .Input("username: string") + .Input("password: string") + .Input("certfile: string") + .Input("keyfile: string") + .Input("cert_password: string") + .Input("schema: int32") + .Input("permutation: int32") + .Output("handle: variant") + .SetIsStateful() + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( Apache Ignite is a memory-centric distributed database, caching, and processing platform for transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an -- GitLab From 5e9a9547f907599f6954fc5e28b7a78acf3b54eb Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 12 Sep 2018 11:02:12 +0800 Subject: [PATCH 033/570] Revert "Add XLA support for LeakyReluOp." This reverts commit d2ad105d2dff3c79d8f49f5fb8ce74c38f424e74. Since bfloat16 was not supported by LeakyRelu, but it should be supported in XLA Ops. --- tensorflow/compiler/tests/binary_ops_test.py | 8 ---- tensorflow/compiler/tests/unary_ops_test.py | 5 --- tensorflow/compiler/tf2xla/kernels/relu_op.cc | 42 ------------------- 3 files changed, 55 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index c478ff4eea..17280e445b 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -178,14 +178,6 @@ class BinaryOpsTest(xla_test.XLATestCase): [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) - self._testBinary( - gen_nn_ops.leaky_relu_grad, - np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), - np.array([-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], - dtype=dtype), - expected=np.array([0.2, 0.4, 0.6, 0.8, 1, 6, 7, 8, 9, 10], - dtype=dtype)) - self._testBinary( gen_nn_ops.softmax_cross_entropy_with_logits, np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype), diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index dd29ef34ce..5b0e57f83f 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -361,11 +361,6 @@ class UnaryOpsTest(xla_test.XLATestCase): np.array([[-0.05, 6.05, 5]], dtype=dtype), expected=np.array([[0, 6, 5]], dtype=dtype)) - self._assertOpOutputMatchesExpected( - nn_ops.leaky_relu, - np.array([[-1.0, 1.0]], dtype=dtype), - expected=np.array([[-0.2, 1.0]], dtype=dtype)) - self._assertOpOutputMatchesExpected( nn_ops.softmax, np.array([1, 2, 3, 4], dtype=dtype), diff --git a/tensorflow/compiler/tf2xla/kernels/relu_op.cc b/tensorflow/compiler/tf2xla/kernels/relu_op.cc index 8d65e0339c..d35777ccb1 100644 --- a/tensorflow/compiler/tf2xla/kernels/relu_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/relu_op.cc @@ -50,23 +50,6 @@ class Relu6Op : public XlaOpKernel { } }; -class LeakyReluOp : public XlaOpKernel { - public: - explicit LeakyReluOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); - } - // Compute the max of the input x and alpha*x. - void Compile(XlaOpKernelContext* ctx) override { - xla::XlaBuilder* builder = ctx->builder(); - auto alpha = XlaHelpers::FloatLiteral(builder, input_type(0), - static_cast(alpha_)); - ctx->SetOutput(0, xla::Max(xla::Mul(alpha, ctx->Input(0)), ctx->Input(0))); - } - - private: - float alpha_; -}; - class ReluGradOp : public XlaOpKernel { public: explicit ReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} @@ -101,35 +84,10 @@ class Relu6GradOp : public XlaOpKernel { } }; -class LeakyReluGradOp : public XlaOpKernel { - public: - explicit LeakyReluGradOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &alpha_)); - } - // Return the lhs (incoming gradient) if the rhs (input feature) > 0, - // otherwise return the alpha * lhs. - void Compile(XlaOpKernelContext* ctx) override { - xla::XlaBuilder* b = ctx->builder(); - const TensorShape shape = ctx->InputShape(0); - const auto zero = - xla::Broadcast(XlaHelpers::Zero(b, input_type(0)), shape.dim_sizes()); - const auto pred = xla::Gt(ctx->Input(1), zero); - auto alpha = - XlaHelpers::FloatLiteral(b, input_type(0), static_cast(alpha_)); - ctx->SetOutput( - 0, xla::Select(pred, ctx->Input(0), xla::Mul(alpha, ctx->Input(0)))); - } - - private: - float alpha_; -}; - REGISTER_XLA_OP(Name("Relu"), ReluOp); REGISTER_XLA_OP(Name("Relu6"), Relu6Op); -REGISTER_XLA_OP(Name("LeakyRelu"), LeakyReluOp); REGISTER_XLA_OP(Name("ReluGrad"), ReluGradOp); REGISTER_XLA_OP(Name("Relu6Grad"), Relu6GradOp); -REGISTER_XLA_OP(Name("LeakyReluGrad"), LeakyReluGradOp); } // namespace } // namespace tensorflow -- GitLab From 9ec9c8b24cca5f1e746fef8cd351b3cae6d5a740 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Wed, 12 Sep 2018 20:42:01 +0300 Subject: [PATCH 034/570] Fixes after second review. --- tensorflow/contrib/ignite/BUILD | 1 + tensorflow/contrib/ignite/__init__.py | 22 +- .../kernels/ignite_binary_object_parser.cc | 404 ++++++++++-------- .../kernels/ignite_binary_object_parser.h | 36 +- .../contrib/ignite/kernels/ignite_client.h | 55 ++- .../contrib/ignite/kernels/ignite_dataset.cc | 99 ++--- .../contrib/ignite/kernels/ignite_dataset.h | 37 +- .../ignite/kernels/ignite_dataset_iterator.cc | 383 ++++++++--------- .../ignite/kernels/ignite_dataset_iterator.h | 74 ++-- .../ignite/kernels/ignite_dataset_ops.cc | 123 ++++-- .../ignite/kernels/ignite_plain_client.h | 15 +- .../kernels/ignite_plain_client_unix.cc | 14 +- .../kernels/ignite_plain_client_windows.cc | 17 +- .../ignite/kernels/ignite_ssl_wrapper.cc | 34 +- .../ignite/kernels/ignite_ssl_wrapper.h | 26 +- tensorflow/contrib/ignite/ops/dataset_ops.cc | 2 + .../ignite/python/ops/ignite_dataset_ops.py | 176 ++++---- 17 files changed, 848 insertions(+), 670 deletions(-) diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD index b7d40a99f7..2f598b4aed 100644 --- a/tensorflow/contrib/ignite/BUILD +++ b/tensorflow/contrib/ignite/BUILD @@ -40,6 +40,7 @@ cc_library( srcs = [ "kernels/ignite_dataset_ops.cc", "kernels/ignite_client.h", + "kernels/ignite_byte_swapper.h", "kernels/ignite_plain_client.h", "kernels/ignite_ssl_wrapper.h", "kernels/ignite_ssl_wrapper.cc", diff --git a/tensorflow/contrib/ignite/__init__.py b/tensorflow/contrib/ignite/__init__.py index b78829d0f4..f42947696f 100644 --- a/tensorflow/contrib/ignite/__init__.py +++ b/tensorflow/contrib/ignite/__init__.py @@ -12,16 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Apache Ignite is a memory-centric distributed database, caching, and - processing platform for transactional, analytical, and streaming workloads, - delivering in-memory speeds at petabyte scale. This contrib package - contains an integration between Apache Ignite and TensorFlow. The - integration is based on tf.data from TensorFlow side and Binary Client - Protocol from Apache Ignite side. It allows to use Apache Ignite as a - datasource for neural network training, inference and all other - computations supported by TensorFlow. Ignite Dataset is based on Apache - Ignite Binary Client Protocol: - https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. +"""IgniteDataset that allows to get data from Apache Ignite. + +Apache Ignite is a memory-centric distributed database, caching, and +processing platform for transactional, analytical, and streaming workloads, +delivering in-memory speeds at petabyte scale. This contrib package +contains an integration between Apache Ignite and TensorFlow. The +integration is based on tf.data from TensorFlow side and Binary Client +Protocol from Apache Ignite side. It allows to use Apache Ignite as a +datasource for neural network training, inference and all other +computations supported by TensorFlow. Ignite Dataset is based on Apache +Ignite Binary Client Protocol: +https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. @@IgniteDataset """ diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc index 9bf4480d2d..2c8a7d44b0 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.cc @@ -13,242 +13,171 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_binary_object_parser.h" +#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" namespace tensorflow { +BinaryObjectParser::BinaryObjectParser() : byte_swapper_(ByteSwapper(false)) {} + Status BinaryObjectParser::Parse(uint8_t** ptr, std::vector* out_tensors, - std::vector* types) { - uint8_t object_type_id = **ptr; - *ptr += 1; + std::vector* types) const { + uint8_t object_type_id = ParseByte(ptr); + + // Skip non-leaf nodes. + if (object_type_id != WRAPPED_OBJ && object_type_id != COMPLEX_OBJ) + types->push_back(object_type_id); switch (object_type_id) { case BYTE: { - Tensor tensor(cpu_allocator(), DT_UINT8, {}); - tensor.scalar()() = *((uint8_t*)*ptr); - *ptr += 1; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_UINT8, TensorShape({})); + out_tensors->back().scalar()() = ParseByte(ptr); break; } case SHORT: { - Tensor tensor(cpu_allocator(), DT_INT16, {}); - tensor.scalar()() = *((int16_t*)*ptr); - *ptr += 2; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_INT16, TensorShape({})); + out_tensors->back().scalar()() = ParseShort(ptr); + break; + } + case USHORT: { + out_tensors->emplace_back(cpu_allocator(), DT_UINT16, TensorShape({})); + out_tensors->back().scalar()() = ParseUnsignedShort(ptr); break; } case INT: { - Tensor tensor(cpu_allocator(), DT_INT32, {}); - tensor.scalar()() = *((int32_t*)*ptr); - *ptr += 4; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_INT32, TensorShape({})); + out_tensors->back().scalar()() = ParseInt(ptr); break; } case LONG: { - Tensor tensor(cpu_allocator(), DT_INT64, {}); - tensor.scalar()() = *((int64_t*)*ptr); - *ptr += 8; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({})); + out_tensors->back().scalar()() = ParseLong(ptr); break; } case FLOAT: { - Tensor tensor(cpu_allocator(), DT_FLOAT, {}); - tensor.scalar()() = *((float*)*ptr); - *ptr += 4; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_FLOAT, TensorShape({})); + out_tensors->back().scalar()() = ParseFloat(ptr); break; } case DOUBLE: { - Tensor tensor(cpu_allocator(), DT_DOUBLE, {}); - tensor.scalar()() = *((double*)*ptr); - *ptr += 8; - out_tensors->push_back(std::move(tensor)); - break; - } - case UCHAR: { - Tensor tensor(cpu_allocator(), DT_UINT16, {}); - tensor.scalar()() = *((uint16_t*)*ptr); - *ptr += 2; - out_tensors->push_back(std::move(tensor)); + out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE, TensorShape({})); + out_tensors->back().scalar()() = ParseDouble(ptr); break; } case BOOL: { - Tensor tensor(cpu_allocator(), DT_BOOL, {}); - tensor.scalar()() = *((bool*)*ptr); - *ptr += 1; - out_tensors->push_back(std::move(tensor)); - + out_tensors->emplace_back(cpu_allocator(), DT_BOOL, TensorShape({})); + out_tensors->back().scalar()() = ParseBool(ptr); break; } case STRING: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_STRING, {}); - tensor.scalar()() = std::string((char*)*ptr, length); - *ptr += length; - out_tensors->push_back(std::move(tensor)); - + out_tensors->emplace_back(cpu_allocator(), DT_STRING, TensorShape({})); + out_tensors->back().scalar()() = ParseString(ptr); break; } case DATE: { - Tensor tensor(cpu_allocator(), DT_INT64, {}); - tensor.scalar()() = *((int64_t*)*ptr); - *ptr += 8; - out_tensors->push_back(std::move(tensor)); - + out_tensors->emplace_back(cpu_allocator(), DT_INT64, TensorShape({})); + out_tensors->back().scalar()() = ParseLong(ptr); break; } case BYTE_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_UINT8, TensorShape({length})); - - uint8_t* arr = (uint8_t*)*ptr; - *ptr += length; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + uint8_t* arr = ParseByteArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_UINT8, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case SHORT_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_INT16, TensorShape({length})); - - int16_t* arr = (int16_t*)*ptr; - *ptr += length * 2; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + int16_t* arr = ParseShortArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT16, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); + break; + } + case USHORT_ARR: { + int32_t length = ParseInt(ptr); + uint16_t* arr = ParseUnsignedShortArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_UINT16, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case INT_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_INT32, TensorShape({length})); - - int32_t* arr = (int32_t*)*ptr; - *ptr += length * 4; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + int32_t* arr = ParseIntArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT32, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case LONG_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length})); - - int64_t* arr = (int64_t*)*ptr; - *ptr += length * 8; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + int64_t* arr = ParseLongArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT64, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case FLOAT_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_FLOAT, TensorShape({length})); - - float* arr = (float*)*ptr; - *ptr += 4 * length; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + float* arr = ParseFloatArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_FLOAT, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case DOUBLE_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_DOUBLE, TensorShape({length})); - - double* arr = (double*)*ptr; - *ptr += 8 * length; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); - break; - } - case UCHAR_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_UINT16, TensorShape({length})); - - uint16_t* arr = (uint16_t*)*ptr; - *ptr += length * 2; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + double* arr = ParseDoubleArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_DOUBLE, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case BOOL_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_BOOL, TensorShape({length})); - - bool* arr = (bool*)*ptr; - *ptr += length; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + bool* arr = ParseBoolArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_BOOL, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case STRING_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_STRING, TensorShape({length})); - - for (int32_t i = 0; i < length; i++) { - int32_t str_length = *((int32_t*)*ptr); - *ptr += 4; - const int8_t* str = (const int8_t*)*ptr; - *ptr += str_length; - tensor.vec()(i) = std::string((char*)str, str_length); - } - - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + out_tensors->emplace_back(cpu_allocator(), DT_STRING, + TensorShape({length})); + for (int32_t i = 0; i < length; i++) + out_tensors->back().vec()(i) = ParseString(ptr); break; } case DATE_ARR: { - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - Tensor tensor(cpu_allocator(), DT_INT64, TensorShape({length})); - int64_t* arr = (int64_t*)*ptr; - *ptr += length * 8; - - std::copy_n(arr, length, tensor.flat().data()); - out_tensors->push_back(std::move(tensor)); + int32_t length = ParseInt(ptr); + int64_t* arr = ParseLongArr(ptr, length); + out_tensors->emplace_back(cpu_allocator(), DT_INT64, + TensorShape({length})); + std::copy_n(arr, length, out_tensors->back().flat().data()); break; } case WRAPPED_OBJ: { - int32_t byte_arr_size = *((int32_t*)*ptr); - *ptr += 4; - + int32_t byte_arr_size = ParseInt(ptr); TF_RETURN_IF_ERROR(Parse(ptr, out_tensors, types)); - - int32_t offset = *((int32_t*)*ptr); - *ptr += 4; + int32_t offset = ParseInt(ptr); break; } case COMPLEX_OBJ: { - uint8_t version = **ptr; - *ptr += 1; - int16_t flags = *((int16_t*)*ptr); // USER_TYPE = 1, HAS_SCHEMA = 2 - *ptr += 2; - int32_t type_id = *((int32_t*)*ptr); - *ptr += 4; - int32_t hash_code = *((int32_t*)*ptr); - *ptr += 4; - int32_t length = *((int32_t*)*ptr); - *ptr += 4; - int32_t schema_id = *((int32_t*)*ptr); - *ptr += 4; - int32_t schema_offset = *((int32_t*)*ptr); - *ptr += 4; - + uint8_t version = ParseByte(ptr); + int16_t flags = ParseShort(ptr); + int32_t type_id = ParseInt(ptr); + int32_t hash_code = ParseInt(ptr); + int32_t length = ParseInt(ptr); + int32_t schema_id = ParseInt(ptr); + int32_t schema_offset = ParseInt(ptr); + + // 24 is size of header just read. uint8_t* end = *ptr + schema_offset - 24; int32_t i = 0; while (*ptr < end) { @@ -261,12 +190,145 @@ Status BinaryObjectParser::Parse(uint8_t** ptr, break; } default: { - return errors::Internal("Unknowd binary type (type id ", - (int)object_type_id, ")"); + return errors::Unknown("Unknowd binary type (type id ", + (int)object_type_id, ")"); } } return Status::OK(); } +uint8_t BinaryObjectParser::ParseByte(uint8_t** ptr) const { + uint8_t res = **ptr; + *ptr += 1; + + return res; +} + +int16_t BinaryObjectParser::ParseShort(uint8_t** ptr) const { + int16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt16(res); + *ptr += 2; + + return *res; +} + +uint16_t BinaryObjectParser::ParseUnsignedShort(uint8_t** ptr) const { + uint16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredUnsignedInt16(res); + *ptr += 2; + + return *res; +} + +int32_t BinaryObjectParser::ParseInt(uint8_t** ptr) const { + int32_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt32(res); + *ptr += 4; + + return *res; +} + +int64_t BinaryObjectParser::ParseLong(uint8_t** ptr) const { + int64_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt64(res); + *ptr += 8; + + return *res; +} + +float BinaryObjectParser::ParseFloat(uint8_t** ptr) const { + float* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredFloat(res); + *ptr += 4; + + return *res; +} + +double BinaryObjectParser::ParseDouble(uint8_t** ptr) const { + double* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredDouble(res); + *ptr += 8; + + return *res; +} + +bool BinaryObjectParser::ParseBool(uint8_t** ptr) const { + bool res = **reinterpret_cast(ptr); + *ptr += 1; + + return res; +} + +string BinaryObjectParser::ParseString(uint8_t** ptr) const { + int32_t length = ParseInt(ptr); + string res(*reinterpret_cast(ptr), length); + *ptr += length; + + return res; +} + +uint8_t* BinaryObjectParser::ParseByteArr(uint8_t** ptr, int length) const { + uint8_t* res = *reinterpret_cast(ptr); + *ptr += length; + + return res; +} + +int16_t* BinaryObjectParser::ParseShortArr(uint8_t** ptr, int length) const { + int16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt16Arr(res, length); + *ptr += length * 2; + + return res; +} + +uint16_t* BinaryObjectParser::ParseUnsignedShortArr(uint8_t** ptr, + int length) const { + uint16_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredUnsignedInt16Arr(res, length); + *ptr += length * 2; + + return res; +} + +int32_t* BinaryObjectParser::ParseIntArr(uint8_t** ptr, int length) const { + int32_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt32Arr(res, length); + *ptr += length * 4; + + return res; +} + +int64_t* BinaryObjectParser::ParseLongArr(uint8_t** ptr, int length) const { + int64_t* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredInt64Arr(res, length); + *ptr += length * 8; + + return res; +} + +float* BinaryObjectParser::ParseFloatArr(uint8_t** ptr, int length) const { + float* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredFloatArr(res, length); + *ptr += length * 4; + + return res; +} + +double* BinaryObjectParser::ParseDoubleArr(uint8_t** ptr, int length) const { + double* res = *reinterpret_cast(ptr); + byte_swapper_.SwapIfRequiredDoubleArr(res, length); + *ptr += length * 8; + + return res; +} + +bool* BinaryObjectParser::ParseBoolArr(uint8_t** ptr, int length) const { + bool* res = *reinterpret_cast(ptr); + *ptr += length; + + return res; +} + } // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h index 9accbd796f..eb1f856643 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h +++ b/tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h @@ -13,16 +13,42 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_ + #include -#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h" +#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/status.h" namespace tensorflow { class BinaryObjectParser { public: + BinaryObjectParser(); Status Parse(uint8_t** ptr, std::vector* out_tensors, - std::vector* types); + std::vector* types) const; + + private: + uint8_t ParseByte(uint8_t** ptr) const; + int16_t ParseShort(uint8_t** ptr) const; + uint16_t ParseUnsignedShort(uint8_t** ptr) const; + int32_t ParseInt(uint8_t** ptr) const; + int64_t ParseLong(uint8_t** ptr) const; + float ParseFloat(uint8_t** ptr) const; + double ParseDouble(uint8_t** ptr) const; + bool ParseBool(uint8_t** ptr) const; + string ParseString(uint8_t** ptr) const; + uint8_t* ParseByteArr(uint8_t** ptr, int length) const; + int16_t* ParseShortArr(uint8_t** ptr, int length) const; + uint16_t* ParseUnsignedShortArr(uint8_t** ptr, int length) const; + int32_t* ParseIntArr(uint8_t** ptr, int length) const; + int64_t* ParseLongArr(uint8_t** ptr, int length) const; + float* ParseFloatArr(uint8_t** ptr, int length) const; + double* ParseDoubleArr(uint8_t** ptr, int length) const; + bool* ParseBoolArr(uint8_t** ptr, int length) const; + + const ByteSwapper byte_swapper_; }; enum ObjectType { @@ -32,7 +58,7 @@ enum ObjectType { LONG = 4, FLOAT = 5, DOUBLE = 6, - UCHAR = 7, + USHORT = 7, BOOL = 8, STRING = 9, DATE = 11, @@ -42,7 +68,7 @@ enum ObjectType { LONG_ARR = 15, FLOAT_ARR = 16, DOUBLE_ARR = 17, - UCHAR_ARR = 18, + USHORT_ARR = 18, BOOL_ARR = 19, STRING_ARR = 20, DATE_ARR = 22, @@ -51,3 +77,5 @@ enum ObjectType { }; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BINARY_OBJECT_PARSER_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_client.h b/tensorflow/contrib/ignite/kernels/ignite_client.h index 944b3fe184..508b6e4a60 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_client.h @@ -16,40 +16,69 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ #define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ +#include "tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" namespace tensorflow { class Client { public: + Client(bool big_endian) : byte_swapper_(ByteSwapper(big_endian)){}; virtual Status Connect() = 0; virtual Status Disconnect() = 0; virtual bool IsConnected() = 0; virtual int GetSocketDescriptor() = 0; - virtual Status ReadData(uint8_t* buf, int32_t length) = 0; - virtual Status WriteData(uint8_t* buf, int32_t length) = 0; + virtual Status ReadData(uint8_t *buf, const int32_t length) = 0; + virtual Status WriteData(const uint8_t *buf, const int32_t length) = 0; - inline Status ReadByte(uint8_t* data) { return ReadData(data, 1); } + inline Status ReadByte(uint8_t *data) { return ReadData(data, 1); } - inline Status ReadShort(int16_t* data) { return ReadData((uint8_t*)data, 2); } + inline Status ReadShort(int16_t *data) { + TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 2)); + byte_swapper_.SwapIfRequiredInt16(data); - inline Status ReadInt(int32_t* data) { return ReadData((uint8_t*)data, 4); } + return Status::OK(); + } + + inline Status ReadInt(int32_t *data) { + TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 4)); + byte_swapper_.SwapIfRequiredInt32(data); + + return Status::OK(); + } - inline Status ReadLong(int64_t* data) { return ReadData((uint8_t*)data, 8); } + inline Status ReadLong(int64_t *data) { + TF_RETURN_IF_ERROR(ReadData((uint8_t *)data, 8)); + byte_swapper_.SwapIfRequiredInt64(data); - inline Status WriteByte(uint8_t data) { return WriteData(&data, 1); } + return Status::OK(); + } + + inline Status WriteByte(const uint8_t data) { return WriteData(&data, 1); } - inline Status WriteShort(int16_t data) { - return WriteData((uint8_t*)&data, 2); + inline Status WriteShort(const int16_t data) { + int16_t tmp = data; + byte_swapper_.SwapIfRequiredInt16(&tmp); + return WriteData((uint8_t *)&tmp, 2); } - inline Status WriteInt(int32_t data) { return WriteData((uint8_t*)&data, 4); } + inline Status WriteInt(const int32_t data) { + int32_t tmp = data; + byte_swapper_.SwapIfRequiredInt32(&tmp); + return WriteData((uint8_t *)&tmp, 4); + } - inline Status WriteLong(int64_t data) { - return WriteData((uint8_t*)&data, 8); + inline Status WriteLong(const int64_t data) { + int64_t tmp = data; + byte_swapper_.SwapIfRequiredInt64(&tmp); + return WriteData((uint8_t *)&tmp, 8); } + + private: + const ByteSwapper byte_swapper_; }; } // namespace tensorflow -#endif +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_CLIENT_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc index f25f8a5b18..c4a7d3c513 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.cc @@ -13,40 +13,41 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_dataset_iterator.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { -IgniteDataset::IgniteDataset(OpKernelContext* ctx, std::string cache_name, - std::string host, int32 port, bool local, - int32 part, int32 page_size, std::string username, - std::string password, std::string certfile, - std::string keyfile, std::string cert_password, - std::vector schema, - std::vector permutation) +IgniteDataset::IgniteDataset(OpKernelContext* ctx, string cache_name, + string host, int32 port, bool local, int32 part, + int32 page_size, string username, string password, + string certfile, string keyfile, + string cert_password, std::vector schema, + std::vector permutation, + DataTypeVector dtypes, + std::vector shapes) : DatasetBase(DatasetContext(ctx)), - cache_name_(cache_name), - host_(host), + cache_name_(std::move(cache_name)), + host_(std::move(host)), port_(port), local_(local), part_(part), page_size_(page_size), - username_(username), - password_(password), - certfile_(certfile), - keyfile_(keyfile), - cert_password_(cert_password), - schema_(schema), - permutation_(permutation) { - SchemaToTypes(); - SchemaToShapes(); - - LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name - << "', host='" << host << "', port=" << port << ", local=" << local - << ", part=" << part << ", page_size=" << page_size - << ", username='" << username << "', certfile='" << certfile - << "', keyfile='" << keyfile + "']"; + username_(std::move(username)), + password_(std::move(password)), + certfile_(std::move(certfile)), + keyfile_(std::move(keyfile)), + cert_password_(std::move(cert_password)), + schema_(std::move(schema)), + permutation_(std::move(permutation)), + dtypes_(dtypes), + shapes_(shapes) { + LOG(INFO) << "Ignite Dataset created [cache_name='" << cache_name_ + << "', host='" << host_ << "', port=" << port_ + << ", local=" << local_ << ", part=" << part_ + << ", page_size=" << page_size_ << ", username='" << username_ + << "', certfile='" << certfile_ << "', keyfile='" + << keyfile_ + "']"; } IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; } @@ -54,10 +55,12 @@ IgniteDataset::~IgniteDataset() { LOG(INFO) << "Ignite Dataset destroyed"; } std::unique_ptr IgniteDataset::MakeIteratorInternal( const string& prefix) const { return std::unique_ptr(new IgniteDatasetIterator( - {this, strings::StrCat(prefix, "::Ignite")}, this->host_, this->port_, - this->cache_name_, this->local_, this->part_, this->page_size_, - this->username_, this->password_, this->certfile_, this->keyfile_, - this->cert_password_, this->schema_, this->permutation_)); + {this, strings::StrCat(prefix, "::Ignite")}, std::move(this->host_), + this->port_, std::move(this->cache_name_), this->local_, this->part_, + this->page_size_, std::move(this->username_), std::move(this->password_), + std::move(this->certfile_), std::move(this->keyfile_), + std::move(this->cert_password_), std::move(this->schema_), + std::move(this->permutation_))); } const DataTypeVector& IgniteDataset::output_dtypes() const { return dtypes_; } @@ -75,42 +78,4 @@ Status IgniteDataset::AsGraphDefInternal(SerializationContext* ctx, "IgniteDataset does not support 'AsGraphDefInternal'"); } -void IgniteDataset::SchemaToTypes() { - for (auto e : schema_) { - if (e == BYTE || e == BYTE_ARR) { - dtypes_.push_back(DT_UINT8); - } else if (e == SHORT || e == SHORT_ARR) { - dtypes_.push_back(DT_INT16); - } else if (e == INT || e == INT_ARR) { - dtypes_.push_back(DT_INT32); - } else if (e == LONG || e == LONG_ARR) { - dtypes_.push_back(DT_INT64); - } else if (e == FLOAT || e == FLOAT_ARR) { - dtypes_.push_back(DT_FLOAT); - } else if (e == DOUBLE || e == DOUBLE_ARR) { - dtypes_.push_back(DT_DOUBLE); - } else if (e == UCHAR || e == UCHAR_ARR) { - dtypes_.push_back(DT_UINT8); - } else if (e == BOOL || e == BOOL_ARR) { - dtypes_.push_back(DT_BOOL); - } else if (e == STRING || e == STRING_ARR) { - dtypes_.push_back(DT_STRING); - } else { - LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; - } - } -} - -void IgniteDataset::SchemaToShapes() { - for (auto e : schema_) { - if (e >= 1 && e < 10) { - shapes_.push_back(PartialTensorShape({})); - } else if (e >= 12 && e < 21) { - shapes_.push_back(PartialTensorShape({-1})); - } else { - LOG(ERROR) << "Unexpected type in schema [type_id=" << e << "]"; - } - } -} - } // namespace tensorflow diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset.h b/tensorflow/contrib/ignite/kernels/ignite_dataset.h index d3fec5910b..66bfdf2e2a 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset.h +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset.h @@ -13,18 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_ + #include "tensorflow/core/framework/dataset.h" namespace tensorflow { class IgniteDataset : public DatasetBase { public: - IgniteDataset(OpKernelContext* ctx, std::string cache_name, std::string host, + IgniteDataset(OpKernelContext* ctx, string cache_name, string host, int32 port, bool local, int32 part, int32 page_size, - std::string username, std::string password, - std::string certfile, std::string keyfile, - std::string cert_password, std::vector schema, - std::vector permutation); + string username, string password, string certfile, + string keyfile, string cert_password, std::vector schema, + std::vector permutation, DataTypeVector dtypes, + std::vector shapes); ~IgniteDataset(); std::unique_ptr MakeIteratorInternal( const string& prefix) const override; @@ -38,25 +41,23 @@ class IgniteDataset : public DatasetBase { Node** output) const override; private: - const std::string cache_name_; - const std::string host_; + const string cache_name_; + const string host_; const int32 port_; const bool local_; const int32 part_; const int32 page_size_; - const std::string username_; - const std::string password_; - const std::string certfile_; - const std::string keyfile_; - const std::string cert_password_; + const string username_; + const string password_; + const string certfile_; + const string keyfile_; + const string cert_password_; const std::vector schema_; const std::vector permutation_; - - DataTypeVector dtypes_; - std::vector shapes_; - - void SchemaToTypes(); - void SchemaToShapes(); + const DataTypeVector dtypes_; + const std::vector shapes_; }; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc index 1774585ecd..f68ded5a3a 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_dataset_iterator.h" - -#include "ignite_plain_client.h" -#include "ignite_ssl_wrapper.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h" +#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h" +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/logging.h" #include @@ -25,30 +25,31 @@ limitations under the License. namespace tensorflow { IgniteDatasetIterator::IgniteDatasetIterator( - const Params& params, std::string host, int32 port, std::string cache_name, - bool local, int32 part, int32 page_size, std::string username, - std::string password, std::string certfile, std::string keyfile, - std::string cert_password, std::vector schema, - std::vector permutation) + const Params& params, string host, int32 port, string cache_name, + bool local, int32 part, int32 page_size, string username, string password, + string certfile, string keyfile, string cert_password, + std::vector schema, std::vector permutation) : DatasetIterator(params), - cache_name_(cache_name), + cache_name_(std::move(cache_name)), local_(local), part_(part), page_size_(page_size), - username_(username), - password_(password), - schema_(schema), - permutation_(permutation), + username_(std::move(username)), + password_(std::move(password)), + schema_(std::move(schema)), + permutation_(std::move(permutation)), remainder_(-1), cursor_id_(-1), - last_page_(false) { - Client* p_client = new PlainClient(host, port); + last_page_(false), + valid_state_(true) { + Client* p_client = new PlainClient(std::move(host), port, false); if (certfile.empty()) client_ = std::unique_ptr(p_client); else - client_ = std::unique_ptr(new SslWrapper( - std::unique_ptr(p_client), certfile, keyfile, cert_password)); + client_ = std::unique_ptr( + new SslWrapper(std::unique_ptr(p_client), std::move(certfile), + std::move(keyfile), std::move(cert_password), false)); LOG(INFO) << "Ignite Dataset Iterator created"; } @@ -60,12 +61,80 @@ IgniteDatasetIterator::~IgniteDatasetIterator() { LOG(INFO) << "Ignite Dataset Iterator destroyed"; } +Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) { + mutex_lock l(mutex_); + + if (valid_state_) { + Status status = + GetNextInternalWithValidState(ctx, out_tensors, end_of_sequence); + + if (!status.ok()) valid_state_ = false; + + return status; + } + + return errors::Unknown("Iterator is invalid"); +} + +Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) { + return errors::Unimplemented( + "Iterator for IgniteDataset does not support 'SaveInternal'"); +} + +Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) { + return errors::Unimplemented( + "Iterator for IgniteDataset does not support 'RestoreInternal')"); +} + +Status IgniteDatasetIterator::GetNextInternalWithValidState( + IteratorContext* ctx, std::vector* out_tensors, + bool* end_of_sequence) { + if (remainder_ == 0 && last_page_) { + cursor_id_ = -1; + *end_of_sequence = true; + + return Status::OK(); + } else { + TF_RETURN_IF_ERROR(EstablishConnection()); + + if (remainder_ == -1) { + TF_RETURN_IF_ERROR(ScanQuery()); + } else if (remainder_ == 0) { + TF_RETURN_IF_ERROR(LoadNextPage()); + } + + uint8_t* initial_ptr = ptr_; + std::vector tensors; + std::vector types; + + TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types)); // Parse key + TF_RETURN_IF_ERROR(parser_.Parse(&ptr_, &tensors, &types)); // Parse val + + remainder_ -= (ptr_ - initial_ptr); + + TF_RETURN_IF_ERROR(CheckTypes(types)); + + for (size_t i = 0; i < tensors.size(); i++) + out_tensors->push_back(tensors[permutation_[i]]); + + *end_of_sequence = false; + + return Status::OK(); + } + + *end_of_sequence = true; + + return Status::OK(); +} + Status IgniteDatasetIterator::EstablishConnection() { if (!client_->IsConnected()) { - Status status = client_->Connect(); - if (!status.ok()) return status; + TF_RETURN_IF_ERROR(client_->Connect()); - status = Handshake(); + Status status = Handshake(); if (!status.ok()) { Status disconnect_status = client_->Disconnect(); if (!disconnect_status.ok()) LOG(ERROR) << disconnect_status.ToString(); @@ -79,19 +148,17 @@ Status IgniteDatasetIterator::EstablishConnection() { Status IgniteDatasetIterator::CloseConnection() { if (cursor_id_ != -1 && !last_page_) { - Status conn_status = EstablishConnection(); - if (!conn_status.ok()) return conn_status; + TF_RETURN_IF_ERROR(EstablishConnection()); - TF_RETURN_IF_ERROR(client_->WriteInt(18)); // Message length - TF_RETURN_IF_ERROR( - client_->WriteShort(close_connection_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteInt(kCloseConnectionReqLength)); + TF_RETURN_IF_ERROR(client_->WriteShort(kCloseConnectionOpcode)); TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Resource ID int32_t res_len; TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); - if (res_len < 12) - return errors::Internal("Close Resource Response is corrupted"); + if (res_len < kMinResLength) + return errors::Unknown("Close Resource Response is corrupted"); int64_t req_id; TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); @@ -100,22 +167,21 @@ Status IgniteDatasetIterator::CloseConnection() { if (status != 0) { uint8_t err_msg_header; TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); - if (err_msg_header == string_val) { + if (err_msg_header == kStringVal) { int32_t err_msg_length; TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); + uint8_t* err_msg_c = new uint8_t[err_msg_length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); - std::string err_msg((char*)err_msg_c, err_msg_length); - delete[] err_msg_c; + string err_msg(reinterpret_cast(err_msg_c), err_msg_length); - return errors::Internal("Close Resource Error [status=", status, - ", message=", err_msg, "]"); + return errors::Unknown("Close Resource Error [status=", status, + ", message=", err_msg, "]"); } - return errors::Internal("Close Resource Error [status=", status, "]"); + return errors::Unknown("Close Resource Error [status=", status, "]"); } - LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed"; - cursor_id_ = -1; return client_->Disconnect(); @@ -126,94 +192,43 @@ Status IgniteDatasetIterator::CloseConnection() { return client_->IsConnected() ? client_->Disconnect() : Status::OK(); } -Status IgniteDatasetIterator::GetNextInternal(IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) { - if (remainder_ == 0 && last_page_) { - LOG(INFO) << "Query Cursor " << cursor_id_ << " is closed"; - - cursor_id_ = -1; - *end_of_sequence = true; - return Status::OK(); - } else { - Status status = EstablishConnection(); - if (!status.ok()) return status; - - if (remainder_ == -1 || remainder_ == 0) { - Status status = remainder_ == -1 ? ScanQuery() : LoadNextPage(); - if (!status.ok()) return status; - } - - uint8_t* initial_ptr = ptr_; - std::vector types; - std::vector tensors; - - status = parser_.Parse(&ptr_, &tensors, &types); // Parse key - if (!status.ok()) return status; - - status = parser_.Parse(&ptr_, &tensors, &types); // Parse val - if (!status.ok()) return status; - - remainder_ -= (ptr_ - initial_ptr); - - out_tensors->resize(tensors.size()); - for (int32_t i = 0; i < tensors.size(); i++) - (*out_tensors)[permutation_[i]] = std::move(tensors[i]); - - *end_of_sequence = false; - return Status::OK(); - } - - *end_of_sequence = true; - return Status::OK(); -} - -Status IgniteDatasetIterator::SaveInternal(IteratorStateWriter* writer) { - return errors::Unimplemented( - "Iterator for IgniteDataset does not support 'SaveInternal'"); -} - -Status IgniteDatasetIterator::RestoreInternal(IteratorContext* ctx, - IteratorStateReader* reader) { - return errors::Unimplemented( - "Iterator for IgniteDataset does not support 'RestoreInternal')"); -} - Status IgniteDatasetIterator::Handshake() { - int32_t msg_len = 8; + int32_t msg_len = kHandshakeReqDefaultLength; if (username_.empty()) msg_len += 1; else - msg_len += 5 + username_.length(); + msg_len += 5 + username_.length(); // 1 byte header, 4 bytes length. if (password_.empty()) msg_len += 1; else - msg_len += 5 + password_.length(); + msg_len += 5 + password_.length(); // 1 byte header, 4 bytes length. TF_RETURN_IF_ERROR(client_->WriteInt(msg_len)); TF_RETURN_IF_ERROR(client_->WriteByte(1)); - TF_RETURN_IF_ERROR(client_->WriteShort(protocol_major_version)); - TF_RETURN_IF_ERROR(client_->WriteShort(protocol_minor_version)); - TF_RETURN_IF_ERROR(client_->WriteShort(protocol_patch_version)); + TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMajorVersion)); + TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolMinorVersion)); + TF_RETURN_IF_ERROR(client_->WriteShort(kProtocolPatchVersion)); TF_RETURN_IF_ERROR(client_->WriteByte(2)); if (username_.empty()) { - TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); + TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal)); } else { - TF_RETURN_IF_ERROR(client_->WriteByte(string_val)); + TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal)); TF_RETURN_IF_ERROR(client_->WriteInt(username_.length())); TF_RETURN_IF_ERROR( - client_->WriteData((uint8_t*)username_.c_str(), username_.length())); + client_->WriteData(reinterpret_cast(username_.c_str()), + username_.length())); } if (password_.empty()) { - TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); + TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal)); } else { - TF_RETURN_IF_ERROR(client_->WriteByte(string_val)); + TF_RETURN_IF_ERROR(client_->WriteByte(kStringVal)); TF_RETURN_IF_ERROR(client_->WriteInt(password_.length())); TF_RETURN_IF_ERROR( - client_->WriteData((uint8_t*)password_.c_str(), password_.length())); + client_->WriteData(reinterpret_cast(password_.c_str()), + password_.length())); } int32_t handshake_res_len; @@ -221,9 +236,6 @@ Status IgniteDatasetIterator::Handshake() { uint8_t handshake_res; TF_RETURN_IF_ERROR(client_->ReadByte(&handshake_res)); - LOG(INFO) << "Handshake length " << handshake_res_len << ", res " - << (int16_t)handshake_res; - if (handshake_res != 1) { int16_t serv_ver_major; TF_RETURN_IF_ERROR(client_->ReadShort(&serv_ver_major)); @@ -234,26 +246,26 @@ Status IgniteDatasetIterator::Handshake() { uint8_t header; TF_RETURN_IF_ERROR(client_->ReadByte(&header)); - if (header == string_val) { + if (header == kStringVal) { int32_t length; TF_RETURN_IF_ERROR(client_->ReadInt(&length)); + uint8_t* err_msg_c = new uint8_t[length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, length)); - std::string err_msg((char*)err_msg_c, length); - delete[] err_msg_c; - - return errors::Internal("Handshake Error [result=", handshake_res, - ", version=", serv_ver_major, ".", serv_ver_minor, - ".", serv_ver_patch, ", message='", err_msg, - "']"); - } else if (header == null_val) { - return errors::Internal("Handshake Error [result=", handshake_res, - ", version=", serv_ver_major, ".", serv_ver_minor, - ".", serv_ver_patch, "]"); + string err_msg(reinterpret_cast(err_msg_c), length); + + return errors::Unknown("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, ", message='", err_msg, "']"); + } else if (header == kNullVal) { + return errors::Unknown("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); } else { - return errors::Internal("Handshake Error [result=", handshake_res, - ", version=", serv_ver_major, ".", serv_ver_minor, - ".", serv_ver_patch, "]"); + return errors::Unknown("Handshake Error [result=", handshake_res, + ", version=", serv_ver_major, ".", serv_ver_minor, + ".", serv_ver_patch, "]"); } } @@ -261,31 +273,26 @@ Status IgniteDatasetIterator::Handshake() { } Status IgniteDatasetIterator::ScanQuery() { - TF_RETURN_IF_ERROR(client_->WriteInt(25)); // Message length - TF_RETURN_IF_ERROR(client_->WriteShort(scan_query_opcode)); // Operation code - TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID + TF_RETURN_IF_ERROR(client_->WriteInt(kScanQueryReqLength)); + TF_RETURN_IF_ERROR(client_->WriteShort(kScanQueryOpcode)); + TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID TF_RETURN_IF_ERROR( client_->WriteInt(JavaHashCode(cache_name_))); // Cache name TF_RETURN_IF_ERROR(client_->WriteByte(0)); // Flags - TF_RETURN_IF_ERROR(client_->WriteByte(null_val)); // Filter object + TF_RETURN_IF_ERROR(client_->WriteByte(kNullVal)); // Filter object TF_RETURN_IF_ERROR(client_->WriteInt(page_size_)); // Cursor page size TF_RETURN_IF_ERROR(client_->WriteInt(part_)); // part_ition to query TF_RETURN_IF_ERROR(client_->WriteByte(local_)); // local_ flag - int64_t wait_start = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - + uint64 wait_start = Env::Default()->NowMicros(); int32_t res_len; TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); + int64_t wait_stop = Env::Default()->NowMicros(); - int64_t wait_stop = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); + LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) / 1000 << " ms"; - LOG(INFO) << "Scan Query waited " << (wait_stop - wait_start) << " ms"; - - if (res_len < 12) return errors::Internal("Scan Query Response is corrupted"); + if (res_len < kMinResLength) + return errors::Unknown("Scan Query Response is corrupted"); int64_t req_id; TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); @@ -297,78 +304,47 @@ Status IgniteDatasetIterator::ScanQuery() { uint8_t err_msg_header; TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); - if (err_msg_header == string_val) { + if (err_msg_header == kStringVal) { int32_t err_msg_length; TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); - std::string err_msg((char*)err_msg_c, err_msg_length); - delete[] err_msg_c; + string err_msg(reinterpret_cast(err_msg_c), err_msg_length); - return errors::Internal("Scan Query Error [status=", status, ", message=", - err_msg, "]"); + return errors::Unknown("Scan Query Error [status=", status, ", message=", + err_msg, "]"); } - return errors::Internal("Scan Query Error [status=", status, "]"); + return errors::Unknown("Scan Query Error [status=", status, "]"); } TF_RETURN_IF_ERROR(client_->ReadLong(&cursor_id_)); - LOG(INFO) << "Query Cursor " << cursor_id_ << " is opened"; - int32_t row_cnt; TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); - remainder_ = res_len - 25; - page_ = std::unique_ptr(new uint8_t[remainder_]); - ptr_ = page_.get(); - - int64_t start = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - - TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_)); - - int64_t stop = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - ; - - double size_in_mb = 1.0 * remainder_ / 1024 / 1024; - double time_in_s = 1.0 * (stop - start) / 1000; - LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 - << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; - - uint8_t last_page_b; - TF_RETURN_IF_ERROR(client_->ReadByte(&last_page_b)); - - last_page_ = !last_page_b; + int32_t page_size = res_len - kScanQueryResHeaderLength; - return Status::OK(); + return ReceivePage(page_size); } Status IgniteDatasetIterator::LoadNextPage() { - TF_RETURN_IF_ERROR(client_->WriteInt(18)); // Message length - TF_RETURN_IF_ERROR( - client_->WriteShort(load_next_page_opcode)); // Operation code + TF_RETURN_IF_ERROR(client_->WriteInt(kLoadNextPageReqLength)); + TF_RETURN_IF_ERROR(client_->WriteShort(kLoadNextPageOpcode)); TF_RETURN_IF_ERROR(client_->WriteLong(0)); // Request ID TF_RETURN_IF_ERROR(client_->WriteLong(cursor_id_)); // Cursor ID - int64_t wait_start = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - + uint64 wait_start = Env::Default()->NowMicros(); int32_t res_len; TF_RETURN_IF_ERROR(client_->ReadInt(&res_len)); + uint64 wait_stop = Env::Default()->NowMicros(); - int64_t wait_stop = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); + LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) / 1000 + << " ms"; - LOG(INFO) << "Load Next Page waited " << (wait_stop - wait_start) << " ms"; - - if (res_len < 12) - return errors::Internal("Load Next Page Response is corrupted"); + if (res_len < kMinResLength) + return errors::Unknown("Load Next Page Response is corrupted"); int64_t req_id; TF_RETURN_IF_ERROR(client_->ReadLong(&req_id)); @@ -380,41 +356,40 @@ Status IgniteDatasetIterator::LoadNextPage() { uint8_t err_msg_header; TF_RETURN_IF_ERROR(client_->ReadByte(&err_msg_header)); - if (err_msg_header == string_val) { + if (err_msg_header == kStringVal) { int32_t err_msg_length; TF_RETURN_IF_ERROR(client_->ReadInt(&err_msg_length)); uint8_t* err_msg_c = new uint8_t[err_msg_length]; + auto clean = gtl::MakeCleanup([err_msg_c] { delete[] err_msg_c; }); TF_RETURN_IF_ERROR(client_->ReadData(err_msg_c, err_msg_length)); - std::string err_msg((char*)err_msg_c, err_msg_length); - delete[] err_msg_c; + string err_msg(reinterpret_cast(err_msg_c), err_msg_length); - return errors::Internal("Load Next Page Error [status=", status, - ", message=", err_msg, "]"); + return errors::Unknown("Load Next Page Error [status=", status, + ", message=", err_msg, "]"); } - return errors::Internal("Load Next Page Error [status=", status, "]"); + return errors::Unknown("Load Next Page Error [status=", status, "]"); } int32_t row_cnt; TF_RETURN_IF_ERROR(client_->ReadInt(&row_cnt)); - remainder_ = res_len - 17; + int32_t page_size = res_len - kLoadNextPageResHeaderLength; + + return ReceivePage(page_size); +} + +Status IgniteDatasetIterator::ReceivePage(int32_t page_size) { + remainder_ = page_size; page_ = std::unique_ptr(new uint8_t[remainder_]); ptr_ = page_.get(); - int64_t start = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - + uint64 start = Env::Default()->NowMicros(); TF_RETURN_IF_ERROR(client_->ReadData(ptr_, remainder_)); - - int64_t stop = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()) - .count(); - ; + uint64 stop = Env::Default()->NowMicros(); double size_in_mb = 1.0 * remainder_ / 1024 / 1024; - double time_in_s = 1.0 * (stop - start) / 1000; + double time_in_s = 1.0 * (stop - start) / 1000 / 1000; LOG(INFO) << "Page size " << size_in_mb << " Mb, time " << time_in_s * 1000 << " ms download speed " << size_in_mb / time_in_s << " Mb/sec"; @@ -426,7 +401,19 @@ Status IgniteDatasetIterator::LoadNextPage() { return Status::OK(); } -int32_t IgniteDatasetIterator::JavaHashCode(std::string str) const { +Status IgniteDatasetIterator::CheckTypes(const std::vector& types) { + if (schema_.size() != types.size()) + return errors::Unknown("Object has unexpected schema"); + + for (size_t i = 0; i < schema_.size(); i++) { + if (schema_[i] != types[permutation_[i]]) + return errors::Unknown("Object has unexpected schema"); + } + + return Status::OK(); +} + +int32_t IgniteDatasetIterator::JavaHashCode(string str) const { int32_t h = 0; for (char& c : str) { h = 31 * h + c; diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h index 5858dbfcb9..c499e2c9cc 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_iterator.h @@ -13,19 +13,22 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_binary_object_parser.h" -#include "ignite_client.h" -#include "ignite_dataset.h" +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_ + +#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" +#include "tensorflow/contrib/ignite/kernels/ignite_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" +#include "tensorflow/core/platform/mutex.h" namespace tensorflow { class IgniteDatasetIterator : public DatasetIterator { public: - IgniteDatasetIterator(const Params& params, std::string host, int32 port, - std::string cache_name, bool local, int32 part, - int32 page_size, std::string username, - std::string password, std::string certfile, - std::string keyfile, std::string cert_password, + IgniteDatasetIterator(const Params& params, string host, int32 port, + string cache_name, bool local, int32 part, + int32 page_size, string username, string password, + string certfile, string keyfile, string cert_password, std::vector schema, std::vector permutation); ~IgniteDatasetIterator(); @@ -38,15 +41,28 @@ class IgniteDatasetIterator : public DatasetIterator { IteratorStateReader* reader) override; private: + Status GetNextInternalWithValidState(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence); + + Status EstablishConnection(); + Status CloseConnection(); + Status Handshake(); + Status ScanQuery(); + Status LoadNextPage(); + Status ReceivePage(int32_t page_size); + Status CheckTypes(const std::vector& types); + int32_t JavaHashCode(string str) const; + std::unique_ptr client_; BinaryObjectParser parser_; - const std::string cache_name_; + const string cache_name_; const bool local_; const int32 part_; const int32 page_size_; - const std::string username_; - const std::string password_; + const string username_; + const string password_; const std::vector schema_; const std::vector permutation_; @@ -54,24 +70,30 @@ class IgniteDatasetIterator : public DatasetIterator { int64_t cursor_id_; bool last_page_; + bool valid_state_; + + mutex mutex_; + std::unique_ptr page_; uint8_t* ptr_; - - Status EstablishConnection(); - Status CloseConnection(); - Status Handshake(); - Status ScanQuery(); - Status LoadNextPage(); - int32_t JavaHashCode(std::string str) const; }; -constexpr uint8_t null_val = 101; -constexpr uint8_t string_val = 9; -constexpr uint8_t protocol_major_version = 1; -constexpr uint8_t protocol_minor_version = 1; -constexpr uint8_t protocol_patch_version = 0; -constexpr int16_t scan_query_opcode = 2000; -constexpr int16_t load_next_page_opcode = 2001; -constexpr int16_t close_connection_opcode = 0; +constexpr uint8_t kNullVal = 101; +constexpr uint8_t kStringVal = 9; +constexpr uint8_t kProtocolMajorVersion = 1; +constexpr uint8_t kProtocolMinorVersion = 1; +constexpr uint8_t kProtocolPatchVersion = 0; +constexpr int16_t kScanQueryOpcode = 2000; +constexpr int16_t kLoadNextPageOpcode = 2001; +constexpr int16_t kCloseConnectionOpcode = 0; +constexpr int32_t kScanQueryReqLength = 25; +constexpr int32_t kScanQueryResHeaderLength = 25; +constexpr int32_t kLoadNextPageReqLength = 18; +constexpr int32_t kLoadNextPageResHeaderLength = 17; +constexpr int32_t kCloseConnectionReqLength = 18; +constexpr int32_t kHandshakeReqDefaultLength = 8; +constexpr int32_t kMinResLength = 12; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_DATASET_ITERATOR_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index d03404a460..eeb29ef30b 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -13,29 +13,73 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" #include -#include "ignite_dataset.h" +#include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" #include "tensorflow/core/framework/dataset.h" namespace tensorflow { namespace { +Status SchemaToTypes(const std::vector& schema, DataTypeVector* dtypes) { + for (auto e : schema) { + if (e == BYTE || e == BYTE_ARR) { + dtypes->push_back(DT_UINT8); + } else if (e == SHORT || e == SHORT_ARR) { + dtypes->push_back(DT_INT16); + } else if (e == INT || e == INT_ARR) { + dtypes->push_back(DT_INT32); + } else if (e == LONG || e == LONG_ARR) { + dtypes->push_back(DT_INT64); + } else if (e == FLOAT || e == FLOAT_ARR) { + dtypes->push_back(DT_FLOAT); + } else if (e == DOUBLE || e == DOUBLE_ARR) { + dtypes->push_back(DT_DOUBLE); + } else if (e == USHORT || e == USHORT_ARR) { + dtypes->push_back(DT_UINT8); + } else if (e == BOOL || e == BOOL_ARR) { + dtypes->push_back(DT_BOOL); + } else if (e == STRING || e == STRING_ARR) { + dtypes->push_back(DT_STRING); + } else { + return errors::Unknown("Unexpected type in schema [type_id=", e, "]"); + } + } + + return Status::OK(); +} + +Status SchemaToShapes(const std::vector& schema, + std::vector* shapes) { + for (auto e : schema) { + if (e >= 1 && e < 10) { + shapes->push_back(PartialTensorShape({})); + } else if (e >= 12 && e < 21) { + shapes->push_back(PartialTensorShape({-1})); + } else { + return errors::Unknown("Unexpected type in schema [type_id=", e, "]"); + } + } + + return Status::OK(); +} + class IgniteDatasetOp : public DatasetOpKernel { public: using DatasetOpKernel::DatasetOpKernel; void MakeDataset(OpKernelContext* ctx, DatasetBase** output) override { - std::string cache_name = ""; - std::string host = ""; + string cache_name = ""; + string host = ""; int32 port = -1; bool local = false; int32 part = -1; int32 page_size = -1; - std::string username = ""; - std::string password = ""; - std::string certfile = ""; - std::string keyfile = ""; - std::string cert_password = ""; + string username = ""; + string password = ""; + string certfile = ""; + string keyfile = ""; + string cert_password = ""; const char* env_cache_name = std::getenv("IGNITE_DATASET_CACHE_NAME"); const char* env_host = std::getenv("IGNITE_DATASET_HOST"); @@ -50,15 +94,15 @@ class IgniteDatasetOp : public DatasetOpKernel { const char* env_cert_password = std::getenv("IGNITE_DATASET_CERT_PASSWORD"); if (env_cache_name) - cache_name = std::string(env_cache_name); + cache_name = string(env_cache_name); else - OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cache_name", - &cache_name)); + OP_REQUIRES_OK( + ctx, ParseScalarArgument(ctx, "cache_name", &cache_name)); if (env_host) - host = std::string(env_host); + host = string(env_host); else - OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "host", &host)); + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "host", &host)); if (env_port) port = atoi(env_port); @@ -82,34 +126,34 @@ class IgniteDatasetOp : public DatasetOpKernel { ParseScalarArgument(ctx, "page_size", &page_size)); if (env_username) - username = std::string(env_username); + username = string(env_username); else - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "username", &username)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "username", &username)); if (env_password) - password = std::string(env_password); + password = string(env_password); else - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "password", &password)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "password", &password)); if (env_certfile) - certfile = std::string(env_certfile); + certfile = string(env_certfile); else - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "certfile", &certfile)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "certfile", &certfile)); if (env_keyfile) - keyfile = std::string(env_keyfile); + keyfile = string(env_keyfile); else - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "keyfile", &keyfile)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "keyfile", &keyfile)); if (env_cert_password) - cert_password = std::string(env_cert_password); + cert_password = string(env_cert_password); else - OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cert_password", - &cert_password)); + OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cert_password", + &cert_password)); const Tensor* schema_tensor; OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor)); @@ -124,19 +168,28 @@ class IgniteDatasetOp : public DatasetOpKernel { const Tensor* permutation_tensor; OP_REQUIRES_OK(ctx, ctx->input("permutation", &permutation_tensor)); - OP_REQUIRES(ctx, schema_tensor->dims() == 1, + OP_REQUIRES(ctx, permutation_tensor->dims() == 1, errors::InvalidArgument("`permutation` must be a vector.")); std::vector permutation; - permutation.reserve(permutation_tensor->NumElements()); + permutation.resize(permutation_tensor->NumElements()); for (int i = 0; i < permutation_tensor->NumElements(); i++) { - permutation.push_back(permutation_tensor->flat()(i)); + // Inversed permutation. + permutation[permutation_tensor->flat()(i)] = i; } - *output = - new IgniteDataset(ctx, cache_name, host, port, local, part, page_size, - username, password, certfile, keyfile, cert_password, - std::move(schema), std::move(permutation)); + DataTypeVector dtypes; + std::vector shapes; + + OP_REQUIRES_OK(ctx, SchemaToTypes(schema, &dtypes)); + OP_REQUIRES_OK(ctx, SchemaToShapes(schema, &shapes)); + + *output = new IgniteDataset( + ctx, std::move(cache_name), std::move(host), port, local, part, + page_size, std::move(username), std::move(password), + std::move(certfile), std::move(keyfile), std::move(cert_password), + std::move(schema), std::move(permutation), std::move(dtypes), + std::move(shapes)); } }; diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h index 6f417a3cb5..750ebe605a 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h @@ -13,28 +13,31 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_client.h" +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_ -#include +#include "tensorflow/contrib/ignite/kernels/ignite_client.h" namespace tensorflow { class PlainClient : public Client { public: - PlainClient(std::string host, int port); + PlainClient(string host, int port, bool big_endian); ~PlainClient(); virtual Status Connect(); virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual Status ReadData(uint8_t* buf, int32_t length); - virtual Status WriteData(uint8_t* buf, int32_t length); + virtual Status ReadData(uint8_t* buf, const int32_t length); + virtual Status WriteData(const uint8_t* buf, const int32_t length); private: - const std::string host_; + const string host_; const int port_; int sock_; }; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_PLAIN_CLIENT_H_ diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc index a4c58a9563..e16c92307d 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_unix.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_plain_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h" #include #include @@ -31,8 +31,8 @@ limitations under the License. namespace tensorflow { -PlainClient::PlainClient(std::string host, int port) - : host_(host), port_(port), sock_(-1) {} +PlainClient::PlainClient(string host, int port, bool big_endian) + : Client(big_endian), host_(std::move(host)), port_(port), sock_(-1) {} PlainClient::~PlainClient() { if (IsConnected()) { @@ -87,7 +87,7 @@ bool PlainClient::IsConnected() { return sock_ != -1; } int PlainClient::GetSocketDescriptor() { return sock_; } -Status PlainClient::ReadData(uint8_t* buf, int32_t length) { +Status PlainClient::ReadData(uint8_t* buf, const int32_t length) { int recieved = 0; while (recieved < length) { @@ -95,7 +95,7 @@ Status PlainClient::ReadData(uint8_t* buf, int32_t length) { if (res < 0) return errors::Internal("Error occured while reading from socket: ", res, - ", ", std::string(strerror(errno))); + ", ", string(strerror(errno))); if (res == 0) return errors::Internal("Server closed connection"); @@ -106,7 +106,7 @@ Status PlainClient::ReadData(uint8_t* buf, int32_t length) { return Status::OK(); } -Status PlainClient::WriteData(uint8_t* buf, int32_t length) { +Status PlainClient::WriteData(const uint8_t* buf, const int32_t length) { int sent = 0; while (sent < length) { @@ -114,7 +114,7 @@ Status PlainClient::WriteData(uint8_t* buf, int32_t length) { if (res < 0) return errors::Internal("Error occured while writing into socket: ", res, - ", ", std::string(strerror(errno))); + ", ", string(strerror(errno))); sent += res; buf += res; diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index 8182fde6d9..9cd08a7779 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_plain_client.h" +#include "tensorflow/contrib/ignite/kernels/ignite_plain_client.h" #define WIN32_LEAN_AND_MEAN #include @@ -29,8 +29,11 @@ limitations under the License. namespace tensorflow { -PlainClient::PlainClient(std::string host, int port) - : host_(host), port_(port), sock_(INVALID_SOCKET) {} +PlainClient::PlainClient(string host, int port, bool big_endian) + : Client(big_endian), + host_(std::move(host)), + port_(port), + sock_(INVALID_SOCKET) {} PlainClient::~PlainClient() { if (IsConnected()) { @@ -55,6 +58,8 @@ Status PlainClient::Connect() { &result); if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res); + auto clean = gtl::MakeCleanup([result] { reeaddrinfo(result); }); + for (ptr = result; ptr != NULL; ptr = ptr->ai_next) { sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); if (sock_ == INVALID_SOCKET) { @@ -72,8 +77,6 @@ Status PlainClient::Connect() { break; } - freeaddrinfo(result); - if (sock_ == INVALID_SOCKET) { WSACleanup(); return errors::Internal("Unable to connect to server"); @@ -99,7 +102,7 @@ bool PlainClient::IsConnected() { return sock_ != INVALID_SOCKET; } int PlainClient::GetSocketDescriptor() { return sock_; } -Status PlainClient::ReadData(uint8_t *buf, int32_t length) { +Status PlainClient::ReadData(uint8_t *buf, const int32_t length) { int recieved = 0; while (recieved < length) { @@ -117,7 +120,7 @@ Status PlainClient::ReadData(uint8_t *buf, int32_t length) { return Status::OK(); } -Status PlainClient::WriteData(uint8_t *buf, int32_t length) { +Status PlainClient::WriteData(const uint8_t *buf, const int32_t length) { int sent = 0; while (sent < length) { diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc index a2bc6b9609..28db509eaa 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_ssl_wrapper.h" +#include "tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" @@ -29,13 +29,15 @@ static int PasswordCb(char *buf, int size, int rwflag, void *password) { return (strlen(buf)); } -SslWrapper::SslWrapper(std::shared_ptr client, std::string certfile, - std::string keyfile, std::string cert_password) - : client_(client), - certfile_(certfile), - keyfile_(keyfile), - cert_password_(cert_password), - ctx_(NULL) {} +SslWrapper::SslWrapper(std::shared_ptr client, string certfile, + string keyfile, string cert_password, bool big_endian) + : Client(big_endian), + client_(client), + certfile_(std::move(certfile)), + keyfile_(std::move(keyfile)), + cert_password_(std::move(cert_password)), + ctx_(nullptr), + ssl_(nullptr) {} SslWrapper::~SslWrapper() { if (IsConnected()) { @@ -43,9 +45,14 @@ SslWrapper::~SslWrapper() { if (!status.ok()) LOG(WARNING) << status.ToString(); } - if (ctx_ != NULL) { + if (ctx_ != nullptr) { SSL_CTX_free(ctx_); - ctx_ = NULL; + ctx_ = nullptr; + } + + if (ssl_ != nullptr) { + SSL_free(ssl_); + ssl_ = nullptr; } } @@ -63,7 +70,7 @@ Status SslWrapper::InitSslContext() { return errors::Internal("Couldn't load cetificate chain (file '", certfile_, "')"); - std::string private_key_file = keyfile_.empty() ? certfile_ : keyfile_; + string private_key_file = keyfile_.empty() ? certfile_ : keyfile_; if (SSL_CTX_use_PrivateKey_file(ctx_, private_key_file.c_str(), SSL_FILETYPE_PEM) != 1) return errors::Internal("Couldn't load private key (file '", @@ -94,6 +101,7 @@ Status SslWrapper::Connect() { Status SslWrapper::Disconnect() { SSL_free(ssl_); + ssl_ = nullptr; LOG(INFO) << "SSL connection closed"; @@ -104,7 +112,7 @@ bool SslWrapper::IsConnected() { return client_->IsConnected(); } int SslWrapper::GetSocketDescriptor() { return client_->GetSocketDescriptor(); } -Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { +Status SslWrapper::ReadData(uint8_t *buf, const int32_t length) { int recieved = 0; while (recieved < length) { @@ -123,7 +131,7 @@ Status SslWrapper::ReadData(uint8_t *buf, int32_t length) { return Status::OK(); } -Status SslWrapper::WriteData(uint8_t *buf, int32_t length) { +Status SslWrapper::WriteData(const uint8_t *buf, const int32_t length) { int sent = 0; while (sent < length) { diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h index bbba6cc181..d59ce91aba 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h @@ -13,35 +13,39 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "ignite_client.h" +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_ + +#include "tensorflow/contrib/ignite/kernels/ignite_client.h" #include -#include namespace tensorflow { class SslWrapper : public Client { public: - SslWrapper(std::shared_ptr client, std::string certfile, - std::string keyfile, std::string cert_password); + SslWrapper(std::shared_ptr client, string certfile, string keyfile, + string cert_password, bool big_endian); ~SslWrapper(); virtual Status Connect(); virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual Status ReadData(uint8_t* buf, int32_t length); - virtual Status WriteData(uint8_t* buf, int32_t length); + virtual Status ReadData(uint8_t* buf, const int32_t length); + virtual Status WriteData(const uint8_t* buf, const int32_t length); private: + Status InitSslContext(); + std::shared_ptr client_; - std::string certfile_; - std::string keyfile_; - std::string cert_password_; + string certfile_; + string keyfile_; + string cert_password_; SSL_CTX* ctx_; SSL* ssl_; - - Status InitSslContext(); }; } // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_SSL_WRAPPER_H_ \ No newline at end of file diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc index fb16b290b1..7d18df11aa 100644 --- a/tensorflow/contrib/ignite/ops/dataset_ops.cc +++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc @@ -37,6 +37,8 @@ REGISTER_OP("IgniteDataset") .SetIsStateful() .SetShapeFn(shape_inference::ScalarShape) .Doc(R"doc( +IgniteDataset that allows to get data from Apache Ignite. + Apache Ignite is a memory-centric distributed database, caching, and processing platform for transactional, analytical, and streaming workloads, delivering in-memory speeds at petabyte scale. This contrib package contains an diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py index 60003ca3b7..c0e24b1c69 100644 --- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py +++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py @@ -41,19 +41,19 @@ class Readable(): def read_byte(self): """Reads and returnes byte.""" - return self.__read("b", 1) + return self._read("b", 1) def read_short(self): """Reads and returns short (2 bytes, little-endian).""" - return self.__read("h", 2) + return self._read("h", 2) def read_int(self): """Reads and returns int (4 bytes, little-endian).""" - return self.__read("i", 4) + return self._read("i", 4) def read_long(self): """Reads and returns long (8 bytes, little-endian).""" - return self.__read("q", 8) + return self._read("q", 8) def skip(self, length): """Skips the specified number of bytes.""" @@ -64,7 +64,7 @@ class Readable(): """Reads the specified number of bytes and returns them as a buffer.""" return None - def __read(self, data_type, length): + def _read(self, data_type, length): """Reads, unpacks and returns specified type (little-endian).""" data_buffer = self.read_data(length) return struct.unpack("<" + data_type, data_buffer)[0] @@ -116,10 +116,10 @@ class TcpClient(Readable): self.sock = context.wrap_socket(self.sock) else: if keyfile is not None: - raise Exception("SSL is disabled, keyfile must not be specified \ + raise RuntimeError("SSL is disabled, keyfile must not be specified \ (to enable SSL specify certfile)") if password is not None: - raise Exception("SSL is disabled, password must not be specified \ + raise RuntimeError("SSL is disabled, password must not be specified \ (to enable SSL specify certfile)") self.host = host @@ -136,19 +136,19 @@ class TcpClient(Readable): def write_byte(self, v): """Writes the specified byte.""" - self.__write(v, "b") + self._write(v, "b") def write_short(self, v): """Writes the specified short (2 bytes, little-endian).""" - self.__write(v, "h") + self._write(v, "h") def write_int(self, v): """Writes the specified short (4 bytes, little-endian).""" - self.__write(v, "i") + self._write(v, "i") def write_long(self, v): """Writes the specified int (8 bytes, little-endian).""" - self.__write(v, "q") + self._write(v, "q") def write_string(self, v): """Writes the specified string.""" @@ -167,7 +167,7 @@ class TcpClient(Readable): data_buffer += buf return data_buffer - def __write(self, value, data_type): + def _write(self, value, data_type): """Packs and writes data using the specified type (little-endian).""" data_buffer = struct.pack("<" + data_type, value) self.sock.sendall(data_buffer) @@ -193,6 +193,7 @@ class BinaryField(): # Binary types defined in Apache Ignite Thin client and supported by # TensorFlow on Apache Ignite, see # https://apacheignite.readme.io/v2.6/docs/binary-client-protocol. +# True means that type is a vector, False means type is scalar. types = { 1: (dtypes.uint8, False), 2: (dtypes.int16, False), @@ -248,13 +249,13 @@ class TypeTreeNode(): dataset. """ if self.fields is None: - object_type = types[self.type_id] - if object_type is not None: + if self.type_id in types: + object_type = types[self.type_id] is_array = object_type[1] if is_array: return tensor_shape.TensorShape([None]) return tensor_shape.TensorShape([]) - raise Exception("Unsupported type [type_id=%d]" % self.type_id) + raise ValueError("Unsupported type [type_id=%d]" % self.type_id) output_shapes = {} for field in self.fields: output_shapes[field.name] = field.to_output_shapes() @@ -265,10 +266,10 @@ class TypeTreeNode(): dataset. """ if self.fields is None: - object_type = types[self.type_id] - if object_type is not None: + if self.type_id in types: + object_type = types[self.type_id] return object_type[0] - raise Exception("Unsupported type [type_id=%d]" % self.type_id) + raise ValueError("Unsupported type [type_id=%d]" % self.type_id) else: output_types = {} for field in self.fields: @@ -276,11 +277,11 @@ class TypeTreeNode(): return output_types def to_flat(self): - """Returns a list of leaf node types.""" + """Returns a list of node types.""" return self.to_flat_rec([]) def to_permutation(self): - """Returns a permutation that should be applied to order object leafs.""" + """Returns a permutation that should be applied to order object leaves.""" correct_order_dict = {} self.traversal_rec(correct_order_dict, 0) object_order = [] @@ -288,9 +289,10 @@ class TypeTreeNode(): return [correct_order_dict[o] for o in object_order] def to_flat_rec(self, flat): - """Formats a list of leaf node types.""" - flat.append(self.type_id) - if self.fields is not None: + """Formats a list of leaf node types in pre-order.""" + if self.fields is None: + flat.append(self.type_id) + else: for field in self.fields: field.to_flat_rec(flat) return flat @@ -320,8 +322,8 @@ class IgniteClient(TcpClient): have the same structure (homogeneous objects) and the cache contains at least one object. """ - def __init__(self, host, port, username=None, password=None, certfile=None,\ - keyfile=None, cert_password=None): + def __init__(self, host, port, username=None, password=None, certfile=None, + keyfile=None, cert_password=None): """Constructs a new instance of IgniteClient. Args: @@ -385,12 +387,13 @@ class IgniteClient(TcpClient): serv_ver_major = self.read_short() serv_ver_minor = self.read_short() serv_ver_patch = self.read_short() - err_msg = self.__parse_string() + err_msg = self._parse_string() if err_msg is None: - raise Exception("Handshake Error [result=%d, version=%d.%d.%d]" \ - % (res, serv_ver_major, serv_ver_minor, serv_ver_patch)) + raise RuntimeError("Handshake Error [result=%d, version=%d.%d.%d]" + % (res, serv_ver_major, serv_ver_minor, + serv_ver_patch)) else: - raise Exception("Handshake Error [result=%d, version=%d.%d.%d, \ + raise RuntimeError("Handshake Error [result=%d, version=%d.%d.%d, \ message='%s']" % ( res, serv_ver_major, @@ -403,7 +406,7 @@ class IgniteClient(TcpClient): """Collects type information about objects stored in the specified cache. """ - cache_name_hash = self.__java_hash_code(cache_name) + cache_name_hash = self._java_hash_code(cache_name) self.write_int(25) # Message length self.write_short(2000) # Operation code self.write_long(0) # Request ID @@ -419,18 +422,18 @@ class IgniteClient(TcpClient): status = self.read_int() if status != 0: - err_msg = self.__parse_string() + err_msg = self._parse_string() if err_msg is None: - raise Exception("Scan Query Error [status=%s]" % status) + raise RuntimeError("Scan Query Error [status=%s]" % status) else: - raise Exception("Scan Query Error [status=%s, message='%s']" \ - % (status, err_msg)) + raise RuntimeError("Scan Query Error [status=%s, message='%s']" + % (status, err_msg)) self.read_long() # Cursor id row_count = self.read_int() if row_count == 0: - raise Exception("Scan Query returned empty result, so it's \ + raise RuntimeError("Scan Query returned empty result, so it's \ impossible to derive the cache type") payload = DataBuffer(self.read_data(result_length - 25)) @@ -438,20 +441,20 @@ class IgniteClient(TcpClient): self.read_byte() # Next page res = TypeTreeNode("root", 0, [ - self.__collect_types("key", payload), - self.__collect_types("val", payload) + self._collect_types("key", payload), + self._collect_types("val", payload) ], [0, 1]) return res - def __java_hash_code(self, s): + def _java_hash_code(self, s): """Computes hash code of the specified string using Java code.""" h = 0 for c in s: h = (31 * h + ord(c)) & 0xFFFFFFFF return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000 - def __collect_types(self, field_name, data): + def _collect_types(self, field_name, data): """Extracts type information from the specified object.""" type_id = data.read_byte() @@ -570,7 +573,7 @@ class IgniteClient(TcpClient): elif header == 101: pass else: - raise Exception("Unknown binary type when expected string \ + raise RuntimeError("Unknown binary type when expected string \ [type_id=%d]" % header) return TypeTreeNode(field_name, type_id) @@ -591,7 +594,7 @@ class IgniteClient(TcpClient): length = data.read_int() inner_data = data.read_data(length) data.read_int() # Offset - return self.__collect_types(field_name, DataBuffer(inner_data)) + return self._collect_types(field_name, DataBuffer(inner_data)) # Complex Object. if type_id == 103: @@ -603,11 +606,11 @@ class IgniteClient(TcpClient): data.read_int() # Object schema id obj_schema_offset = data.read_int() - obj_type = self.__get_type(obj_type_id) + obj_type = self._get_type(obj_type_id) children = [] for obj_field in obj_type.fields: - child = self.__collect_types(obj_field.field_name, data) + child = self._collect_types(obj_field.field_name, data) children.append(child) children_sorted = sorted(children, key=lambda child: child.name) @@ -618,9 +621,9 @@ class IgniteClient(TcpClient): return TypeTreeNode(field_name, type_id, children, permutation) - raise Exception("Unknown binary type [type_id=%d]" % type_id) + raise RuntimeError("Unknown binary type [type_id=%d]" % type_id) - def __get_type(self, type_id): + def _get_type(self, type_id): """Queries Apache Ignite information about type by type id.""" self.write_int(14) # Message length self.write_short(3002) # Operation code @@ -632,25 +635,25 @@ class IgniteClient(TcpClient): status = self.read_int() if status != 0: - err_msg = self.__parse_string() + err_msg = self._parse_string() if err_msg is None: - raise Exception("Get Binary Type Error [status=%d, message='%s']" \ - % (status, err_msg)) + raise RuntimeError("Get Binary Type Error [status=%d, message='%s']" + % (status, err_msg)) else: - raise Exception("Get Binary Type Error [status=%d]" % status) + raise RuntimeError("Get Binary Type Error [status=%d]" % status) binary_type_exists = self.read_byte() if binary_type_exists == 0: - raise Exception("Binary type not found [type_id=%d] " % type_id) + raise RuntimeError("Binary type not found [type_id=%d] " % type_id) binary_type_id = self.read_int() - binary_type_name = self.__parse_string() - self.__parse_string() # Affinity field name + binary_type_name = self._parse_string() + self._parse_string() # Affinity field name fields = [] for _ in range(self.read_int()): - field_name = self.__parse_string() + field_name = self._parse_string() field_type_id = self.read_int() field_id = self.read_int() @@ -659,7 +662,7 @@ class IgniteClient(TcpClient): is_enum = self.read_byte() if is_enum == 1: - raise Exception("Enum fields are not supported yet") + raise RuntimeError("Enum fields are not supported yet") schema_cnt = self.read_int() for _ in range(schema_cnt): @@ -669,7 +672,7 @@ class IgniteClient(TcpClient): return BinaryType(binary_type_id, binary_type_name, fields) - def __parse_string(self): + def _parse_string(self): """Parses string.""" header = self.read_byte() if header == 9: @@ -677,8 +680,8 @@ class IgniteClient(TcpClient): return self.read_data(length).decode("utf-8") if header == 101: return None - raise Exception("Unknown binary type when expected string [type_id=%d]" \ - % header) + raise RuntimeError("Unknown binary type when expected string [type_id=%d]" + % header) class IgniteDataset(Dataset): """Apache Ignite is a memory-centric distributed database, caching, and @@ -692,9 +695,9 @@ class IgniteDataset(Dataset): Ignite Binary Client Protocol. """ - def __init__(self, cache_name, host="localhost", port=10800, local=False,\ - part=-1, page_size=100, username=None, password=None, certfile=None,\ - keyfile=None, cert_password=None): + def __init__(self, cache_name, host="localhost", port=10800, local=False, + part=-1, page_size=100, username=None, password=None, + certfile=None, keyfile=None, cert_password=None): """Create a IgniteDataset. Args: @@ -716,39 +719,44 @@ class IgniteDataset(Dataset): """ super(IgniteDataset, self).__init__() - with IgniteClient(host, port, username, password, certfile, keyfile,\ - cert_password) as client: + with IgniteClient(host, port, username, password, certfile, keyfile, + cert_password) as client: client.handshake() self.cache_type = client.get_cache_type(cache_name) - self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string,\ - name="cache_name") + self.cache_name = ops.convert_to_tensor(cache_name, dtype=dtypes.string, + name="cache_name") self.host = ops.convert_to_tensor(host, dtype=dtypes.string, name="host") self.port = ops.convert_to_tensor(port, dtype=dtypes.int32, name="port") self.local = ops.convert_to_tensor(local, dtype=dtypes.bool, name="local") self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part") - self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32,\ - name="page_size") - self.username = ops.convert_to_tensor("" if username is None else username,\ - dtype=dtypes.string, name="username") - self.password = ops.convert_to_tensor("" if password is None else password,\ - dtype=dtypes.string, name="password") - self.certfile = ops.convert_to_tensor("" if certfile is None else certfile,\ - dtype=dtypes.string, name="certfile") - self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile,\ - dtype=dtypes.string, name="keyfile") - self.cert_password = ops.convert_to_tensor("" if cert_password is None\ - else cert_password, dtype=dtypes.string, name="cert_password") - self.schema = ops.convert_to_tensor(self.cache_type.to_flat(),\ - dtype=dtypes.int32, name="schema") - self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(),\ - dtype=dtypes.int32, name="permutation") + self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32, + name="page_size") + self.username = ops.convert_to_tensor("" if username is None else username, + dtype=dtypes.string, name="username") + self.password = ops.convert_to_tensor("" if password is None else password, + dtype=dtypes.string, name="password") + self.certfile = ops.convert_to_tensor("" if certfile is None else certfile, + dtype=dtypes.string, name="certfile") + self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile, + dtype=dtypes.string, name="keyfile") + self.cert_password = ops.convert_to_tensor("" if cert_password is None + else cert_password, + dtype=dtypes.string, + name="cert_password") + self.schema = ops.convert_to_tensor(self.cache_type.to_flat(), + dtype=dtypes.int32, name="schema") + self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(), + dtype=dtypes.int32, + name="permutation") def _as_variant_tensor(self): - return gen_dataset_ops.ignite_dataset(self.cache_name, self.host,\ - self.port, self.local, self.part, self.page_size, self.username,\ - self.password, self.certfile, self.keyfile, self.cert_password,\ - self.schema, self.permutation) + return gen_dataset_ops.ignite_dataset(self.cache_name, self.host, + self.port, self.local, self.part, + self.page_size, self.username, + self.password, self.certfile, + self.keyfile, self.cert_password, + self.schema, self.permutation) @property def output_classes(self): -- GitLab From ce9b23070638094022036656e5d1fbf3e23b74c6 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Thu, 13 Sep 2018 11:24:37 +0300 Subject: [PATCH 035/570] Add forgotten ignite_byte_swapper.h --- .../ignite/kernels/ignite_byte_swapper.h | 129 ++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h new file mode 100644 index 0000000000..986bedcf69 --- /dev/null +++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h @@ -0,0 +1,129 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ +#define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ + +#include + +namespace tensorflow { + +class ByteSwapper { + public: + ByteSwapper(bool big_endian) { + int x = 1; + bool is_little_endian = (*(char *)&x == 1); + swap_ = big_endian == is_little_endian; + } + + inline void SwapIfRequiredInt16(int16_t *x) const { + if (swap_) { + Swap16(x); + } + } + + inline void SwapIfRequiredUnsignedInt16(uint16_t *x) const { + if (swap_) { + Swap16(reinterpret_cast(x)); + } + } + + inline void SwapIfRequiredInt32(int32_t *x) const { + if (swap_) { + Swap32(x); + } + } + + inline void SwapIfRequiredFloat(float *x) const { + if (swap_) { + Swap32(reinterpret_cast(x)); + } + } + + inline void SwapIfRequiredInt64(int64_t *x) const { + if (swap_) { + Swap64(x); + } + } + + inline void SwapIfRequiredDouble(double *x) const { + if (swap_) { + Swap64(reinterpret_cast(x)); + } + } + + inline void SwapIfRequiredInt16Arr(int16_t *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) Swap16(&x[i]); + } + } + + inline void SwapIfRequiredUnsignedInt16Arr(uint16_t *x, + int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) + Swap16(reinterpret_cast(&x[i])); + } + } + + inline void SwapIfRequiredInt32Arr(int32_t *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) Swap32(&x[i]); + } + } + + inline void SwapIfRequiredFloatArr(float *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) + Swap32(reinterpret_cast(&x[i])); + } + } + + inline void SwapIfRequiredInt64Arr(int64_t *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) Swap64(&x[i]); + } + } + + inline void SwapIfRequiredDoubleArr(double *x, int32_t length) const { + if (swap_) { + for (int32_t i = 0; i < length; i++) + Swap64(reinterpret_cast(&x[i])); + } + } + + private: + inline void Swap16(int16_t *x) const { + *x = ((*x & 0xFF) << 8) | ((*x >> 8) & 0xFF); + } + + inline void Swap32(int32_t *x) const { + *x = ((*x & 0xFF) << 24) | (((*x >> 8) & 0xFF) << 16) | + (((*x >> 16) & 0xFF) << 8) | ((*x >> 24) & 0xFF); + } + + inline void Swap64(int64_t *x) const { + *x = ((*x & 0xFF) << 56) | (((*x >> 8) & 0xFF) << 48) | + (((*x >> 16) & 0xFF) << 40) | (((*x >> 24) & 0xFF) << 32) | + (((*x >> 32) & 0xFF) << 24) | (((*x >> 40) & 0xFF) << 16) | + (((*x >> 48) & 0xFF) << 8) | ((*x >> 56) & 0xFF); + } + + bool swap_; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ -- GitLab From d797e99a043e01609583a37c04e1e509d126e1a0 Mon Sep 17 00:00:00 2001 From: dmitrievanthony Date: Thu, 13 Sep 2018 09:42:16 +0000 Subject: [PATCH 036/570] Fix windows build. --- .../contrib/ignite/kernels/ignite_plain_client_windows.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index 9cd08a7779..17f2bf45d1 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -24,6 +24,7 @@ limitations under the License. #pragma comment(lib, "Mswsock.lib") #pragma comment(lib, "AdvApi32.lib") +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/platform/logging.h" @@ -58,7 +59,7 @@ Status PlainClient::Connect() { &result); if (res != 0) return errors::Internal("Getaddrinfo failed with error: ", res); - auto clean = gtl::MakeCleanup([result] { reeaddrinfo(result); }); + auto clean = gtl::MakeCleanup([result] { freeaddrinfo(result); }); for (ptr = result; ptr != NULL; ptr = ptr->ai_next) { sock_ = socket(ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol); -- GitLab From c8b60b894b91cfdb4176176d7dcf328d2b40b41f Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Thu, 13 Sep 2018 16:34:59 +0300 Subject: [PATCH 037/570] Fix code style. --- .../ignite/kernels/ignite_byte_swapper.h | 18 +++++++++--------- .../ignite/kernels/ignite_dataset_ops.cc | 2 +- .../kernels/ignite_plain_client_windows.cc | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h index 986bedcf69..5b42de4c5a 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h @@ -36,7 +36,7 @@ class ByteSwapper { inline void SwapIfRequiredUnsignedInt16(uint16_t *x) const { if (swap_) { - Swap16(reinterpret_cast(x)); + Swap16(reinterpret_cast(x)); } } @@ -48,7 +48,7 @@ class ByteSwapper { inline void SwapIfRequiredFloat(float *x) const { if (swap_) { - Swap32(reinterpret_cast(x)); + Swap32(reinterpret_cast(x)); } } @@ -60,7 +60,7 @@ class ByteSwapper { inline void SwapIfRequiredDouble(double *x) const { if (swap_) { - Swap64(reinterpret_cast(x)); + Swap64(reinterpret_cast(x)); } } @@ -73,8 +73,8 @@ class ByteSwapper { inline void SwapIfRequiredUnsignedInt16Arr(uint16_t *x, int32_t length) const { if (swap_) { - for (int32_t i = 0; i < length; i++) - Swap16(reinterpret_cast(&x[i])); + for (int32_t i = 0; i < length; i++) + Swap16(reinterpret_cast(&x[i])); } } @@ -86,8 +86,8 @@ class ByteSwapper { inline void SwapIfRequiredFloatArr(float *x, int32_t length) const { if (swap_) { - for (int32_t i = 0; i < length; i++) - Swap32(reinterpret_cast(&x[i])); + for (int32_t i = 0; i < length; i++) + Swap32(reinterpret_cast(&x[i])); } } @@ -99,8 +99,8 @@ class ByteSwapper { inline void SwapIfRequiredDoubleArr(double *x, int32_t length) const { if (swap_) { - for (int32_t i = 0; i < length; i++) - Swap64(reinterpret_cast(&x[i])); + for (int32_t i = 0; i < length; i++) + Swap64(reinterpret_cast(&x[i])); } } diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index eeb29ef30b..e48fce4ed2 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" #include #include "tensorflow/contrib/ignite/kernels/ignite_binary_object_parser.h" +#include "tensorflow/contrib/ignite/kernels/ignite_dataset.h" #include "tensorflow/core/framework/dataset.h" namespace tensorflow { diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc index 17f2bf45d1..43d6108c34 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client_windows.cc @@ -24,8 +24,8 @@ limitations under the License. #pragma comment(lib, "Mswsock.lib") #pragma comment(lib, "AdvApi32.lib") -#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/platform/logging.h" namespace tensorflow { -- GitLab From c513c04aed8790c78c46b78f90ec848555498ce4 Mon Sep 17 00:00:00 2001 From: dmitrievanthony Date: Thu, 13 Sep 2018 15:13:54 +0000 Subject: [PATCH 038/570] Add -DWIN32_LEAN_AND_MEAN option into BUILD. --- tensorflow/contrib/ignite/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD index 2f598b4aed..1adc6c6ccc 100644 --- a/tensorflow/contrib/ignite/BUILD +++ b/tensorflow/contrib/ignite/BUILD @@ -61,6 +61,9 @@ cc_library( "@boringssl//:ssl", "@protobuf_archive//:protobuf_headers", ], + copts = if_windows([ + "-DWIN32_LEAN_AND_MEAN", + ]), alwayslink = 1, ) -- GitLab From f54856b1448bed24534189e4aa2ebb9d0b4f5b9a Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Thu, 13 Sep 2018 18:13:47 +0000 Subject: [PATCH 039/570] Apply buildifier changes. --- tensorflow/contrib/ignite/BUILD | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/ignite/BUILD b/tensorflow/contrib/ignite/BUILD index 1adc6c6ccc..9393b702d1 100644 --- a/tensorflow/contrib/ignite/BUILD +++ b/tensorflow/contrib/ignite/BUILD @@ -6,14 +6,14 @@ exports_files(["LICENSE"]) load( "//tensorflow:tensorflow.bzl", - "tf_gen_op_wrapper_py", - "tf_kernel_library", + "if_not_windows", + "if_windows", "tf_custom_op_library", "tf_custom_op_py_library", "tf_gen_op_libs", + "tf_gen_op_wrapper_py", + "tf_kernel_library", "tf_py_test", - "if_not_windows", - "if_windows", ) py_library( @@ -55,15 +55,15 @@ cc_library( ]) + if_windows([ "kernels/ignite_plain_client_windows.cc", ]), + copts = if_windows([ + "-DWIN32_LEAN_AND_MEAN", + ]), deps = [ "//tensorflow/core:framework_headers_lib", "//third_party/eigen3", "@boringssl//:ssl", "@protobuf_archive//:protobuf_headers", ], - copts = if_windows([ - "-DWIN32_LEAN_AND_MEAN", - ]), alwayslink = 1, ) -- GitLab From 74b9d6a48286f38807bbd204d9d55467e02387ca Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Fri, 14 Sep 2018 16:25:36 -0700 Subject: [PATCH 040/570] [Intel MKL] Fixes for unit test failures 1) Changes in partitioned_function_ops.cc are for passing Global OpRegistry as default_registry in PartitionedFunction op This fix addresses failure in MKL layout pass when PartitionedFunction op calls graph optimization passes. The problem was that the function library definition that is used to create function graph and corresponding subgraphs after partitioning did not use global OpRegistry as the default OpRegistry used for look of operator names. Because of that, standard operators such as "Const" were not available to graph passes. 2) Changes in mkl_cpu_allocator.h are to address failure in mkl_cpu_allocator_test which was expecting that max_bytes_limits is returned via GetStats() in MKLCPUAllocator. --- tensorflow/core/common_runtime/mkl_cpu_allocator.h | 3 +++ tensorflow/core/kernels/partitioned_function_ops.cc | 12 +++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index df9c3a686c..593f855ea2 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -277,6 +277,9 @@ class MklCPUAllocator : public VisitableAllocator { // max_alloc_size from large_size_allocator would be the maximum // size allocated by MklCPUAllocator. stats->max_alloc_size = l_stats.max_alloc_size; + + stats->bytes_limit = + std::max(s_stats.bytes_limit, l_stats.bytes_limit); } void ClearStats() override { diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index fc1c9003aa..ddb621967a 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -97,7 +97,12 @@ class PartitionedCallOp : public AsyncOpKernel { OP_REQUIRES_ASYNC(ctx, fbody != nullptr, errors::Internal("Could not find handle ", handle), done); - auto graph = tensorflow::MakeUnique(fbody->graph->flib_def()); + // We need to pass global op_registry as default_registry when creating + // graph. So that graph optimization passes can lookup all possible ops + // by name. + FunctionLibraryDefinition func_lib_def(OpRegistry::Global(), + fbody->graph->flib_def().ToProto()); + auto graph = tensorflow::MakeUnique(func_lib_def); CopyGraph(*fbody->graph, graph.get()); OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done); @@ -250,9 +255,10 @@ class PartitionedCallOp : public AsyncOpKernel { VLOG(3) << "Partitioned function '" << func_.name() << "', yielding " << partitions.size() << " shards."; - const FunctionLibraryDefinition* flib_def = &graph->flib_def(); + FunctionLibraryDefinition func_lib_def(OpRegistry::Global(), + graph->flib_def().ToProto()); for (const auto& partition : partitions) { - std::unique_ptr subgraph(new Graph(flib_def)); + std::unique_ptr subgraph(new Graph(func_lib_def)); GraphConstructorOptions opts; opts.allow_internal_ops = true; opts.expect_device_spec = true; -- GitLab From fa80a920f2a3bc00522fe95fc9a07a28d67fc055 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Mon, 17 Sep 2018 12:50:18 +0300 Subject: [PATCH 041/570] Add 'override' specifier to ReadData, WriteData. --- tensorflow/contrib/ignite/kernels/ignite_plain_client.h | 4 ++-- tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h index 750ebe605a..d12d56fdc1 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_plain_client.h +++ b/tensorflow/contrib/ignite/kernels/ignite_plain_client.h @@ -29,8 +29,8 @@ class PlainClient : public Client { virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual Status ReadData(uint8_t* buf, const int32_t length); - virtual Status WriteData(const uint8_t* buf, const int32_t length); + virtual Status ReadData(uint8_t* buf, const int32_t length) override; + virtual Status WriteData(const uint8_t* buf, const int32_t length) override; private: const string host_; diff --git a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h index d59ce91aba..372156a757 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_ssl_wrapper.h @@ -32,8 +32,8 @@ class SslWrapper : public Client { virtual Status Disconnect(); virtual bool IsConnected(); virtual int GetSocketDescriptor(); - virtual Status ReadData(uint8_t* buf, const int32_t length); - virtual Status WriteData(const uint8_t* buf, const int32_t length); + virtual Status ReadData(uint8_t* buf, const int32_t length) override; + virtual Status WriteData(const uint8_t* buf, const int32_t length) override; private: Status InitSslContext(); -- GitLab From 7820ead0c58c9d90d7776bea31a294bbcc9a30f8 Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Mon, 30 Jul 2018 09:46:05 -0500 Subject: [PATCH 042/570] Make full model before calling set_model on callback Commit 1b67ccbe8006eacffd268553abd01310e8b187d6 removed the _make_train_function calls from Keras training fit_generator for eager execution. This breaks some callbacks that depend on the entire model to be populated on the set_model or on_train_begin methods. This commit adds the method calls back in but guarded by an eager check. It is not doing a revert / fix because the fix that removed the calls also put a test case in for eager fit_generator testing which we want to retain. --- tensorflow/python/keras/engine/training_generator.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tensorflow/python/keras/engine/training_generator.py b/tensorflow/python/keras/engine/training_generator.py index 413c1f4fba..2e074699da 100644 --- a/tensorflow/python/keras/engine/training_generator.py +++ b/tensorflow/python/keras/engine/training_generator.py @@ -21,6 +21,7 @@ from __future__ import print_function import numpy as np +from tensorflow.python.eager import context from tensorflow.python.keras import callbacks as cbks from tensorflow.python.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras.utils.data_utils import OrderedEnqueuer @@ -48,6 +49,10 @@ def fit_generator(model, epoch = initial_epoch do_validation = bool(validation_data) + if not context.executing_eagerly(): + model._make_train_function() + if do_validation: + model._make_test_function() is_sequence = isinstance(generator, Sequence) if not is_sequence and use_multiprocessing and workers > 1: @@ -233,6 +238,9 @@ def evaluate_generator(model, use_multiprocessing=False, verbose=0): """See docstring for `Model.evaluate_generator`.""" + if not context.executing_eagerly(): + model._make_test_function() + if hasattr(model, 'metrics'): for m in model.stateful_metric_functions: m.reset_states() @@ -342,6 +350,9 @@ def predict_generator(model, use_multiprocessing=False, verbose=0): """See docstring for `Model.predict_generator`.""" + if not context.executing_eagerly(): + model._make_test_function() + steps_done = 0 wait_time = 0.01 all_outs = [] -- GitLab From 66575e0537ba8952de8ebc45d45d1b9e4ba1b6ba Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Thu, 2 Aug 2018 13:39:48 -0500 Subject: [PATCH 043/570] Add unit test for fit_generator changes Add unit test for fit_generator change for callbacks. --- .../python/keras/engine/training_test.py | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 30be4131a4..465b4ad65f 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -31,6 +31,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras import metrics as metrics_module +from tensorflow.python.keras import callbacks from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine.training_utils import weighted_masked_objective from tensorflow.python.keras.utils.generic_utils import slice_arrays @@ -1190,6 +1191,37 @@ class TestGeneratorMethods(test.TestCase): use_multiprocessing=False, workers=0) + def test_fit_generator_with_callback(self): + model = keras.Sequential() + model.add(keras.layers.Dense(4, input_shape=(3,))) + optimizer = RMSPropOptimizer(learning_rate=0.001) + model.compile(optimizer, 'mse', metrics=['mae']) + + x = np.random.random((10, 3)) + y = np.random.random((10, 4)) + + def iterator(): + while 1: + yield x, y + + class TestCallback(callbacks.Callback): + def set_model(self, model): + # Check the model operations for the optimizer operations that + # the _make_train_function adds under a named scope for the + # optimizer. This ensurs the full model is populated before the + # set_model callback is called. + optimizer_name_scope = 'training/TFOptimizer/' + graph_def = ops.get_default_graph().as_graph_def() + for node in graph_def.node: + if node.name.startswith(optimizer_name_scope): + return + raise RuntimeError('The optimizer operations are not present in the ' + 'model graph when the Callback.set_model function ' + 'is called') + + model.fit_generator(iterator(), steps_per_epoch=3, epochs=1, + callbacks=[TestCallback()]) + def test_generator_methods_with_sample_weights(self): arr_data = np.random.random((50, 2)) arr_labels = np.random.random((50,)) -- GitLab From da3ccfda9b75f3cf60eb237d9a4da68c436e9f18 Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Mon, 17 Sep 2018 11:59:14 -0500 Subject: [PATCH 044/570] Move test to callbacks_test --- tensorflow/python/keras/callbacks_test.py | 40 +++++++++++++++++++ .../python/keras/engine/training_test.py | 31 -------------- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index b6fae19823..28f7614463 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -30,6 +30,7 @@ import numpy as np from tensorflow.core.framework import summary_pb2 from tensorflow.python import keras +from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import test_util from tensorflow.python.keras import testing_utils @@ -1222,6 +1223,45 @@ class KerasCallbacksTest(test.TestCase): callbacks=cbks, epochs=1) + def test_fit_generator_with_callback(self): + + class TestCallback(keras.callbacks.Callback): + def set_model(self, model): + # Check the model operations for the optimizer operations that + # the _make_train_function adds under a named scope for the + # optimizer. This ensurs the full model is populated before the + # set_model callback is called. + optimizer_name_scope = 'training/' + model.optimizer.__class__.__name__ + graph_def = ops.get_default_graph().as_graph_def() + for node in graph_def.node: + if node.name.startswith(optimizer_name_scope): + return + raise RuntimeError('The optimizer operations are not present in the ' + 'model graph when the Callback.set_model function ' + 'is called') + np.random.seed(1337) + + def generator(): + x = np.random.randn(10, 100).astype(np.float32) + y = np.random.randn(10, 10).astype(np.float32) + while True: + yield x, y + + with self.cached_session(): + model = testing_utils.get_small_sequential_mlp( + num_hidden=10, num_classes=10, input_dim=100) + model.compile( + loss='categorical_crossentropy', + optimizer='sgd', + metrics=['accuracy']) + model.fit_generator( + generator(), + steps_per_epoch=2, + epochs=1, + validation_data=generator(), + validation_steps=2, + callbacks=[TestCallback()], + verbose=0) if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index 465b4ad65f..d8510c1f23 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -1191,37 +1191,6 @@ class TestGeneratorMethods(test.TestCase): use_multiprocessing=False, workers=0) - def test_fit_generator_with_callback(self): - model = keras.Sequential() - model.add(keras.layers.Dense(4, input_shape=(3,))) - optimizer = RMSPropOptimizer(learning_rate=0.001) - model.compile(optimizer, 'mse', metrics=['mae']) - - x = np.random.random((10, 3)) - y = np.random.random((10, 4)) - - def iterator(): - while 1: - yield x, y - - class TestCallback(callbacks.Callback): - def set_model(self, model): - # Check the model operations for the optimizer operations that - # the _make_train_function adds under a named scope for the - # optimizer. This ensurs the full model is populated before the - # set_model callback is called. - optimizer_name_scope = 'training/TFOptimizer/' - graph_def = ops.get_default_graph().as_graph_def() - for node in graph_def.node: - if node.name.startswith(optimizer_name_scope): - return - raise RuntimeError('The optimizer operations are not present in the ' - 'model graph when the Callback.set_model function ' - 'is called') - - model.fit_generator(iterator(), steps_per_epoch=3, epochs=1, - callbacks=[TestCallback()]) - def test_generator_methods_with_sample_weights(self): arr_data = np.random.random((50, 2)) arr_labels = np.random.random((50,)) -- GitLab From 3fe9c54b6181bc2bbfa535b28ecb7d3b74342bd8 Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Mon, 17 Sep 2018 12:13:15 -0500 Subject: [PATCH 045/570] Remove unnecessary import of callbacks --- tensorflow/python/keras/engine/training_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/keras/engine/training_test.py b/tensorflow/python/keras/engine/training_test.py index d8510c1f23..30be4131a4 100644 --- a/tensorflow/python/keras/engine/training_test.py +++ b/tensorflow/python/keras/engine/training_test.py @@ -31,7 +31,6 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util as tf_test_util from tensorflow.python.keras import metrics as metrics_module -from tensorflow.python.keras import callbacks from tensorflow.python.keras import testing_utils from tensorflow.python.keras.engine.training_utils import weighted_masked_objective from tensorflow.python.keras.utils.generic_utils import slice_arrays -- GitLab From 12718f0204bad8aaa3984c7a176914451eb0bbab Mon Sep 17 00:00:00 2001 From: Samuel Matzek Date: Mon, 17 Sep 2018 13:24:29 -0500 Subject: [PATCH 046/570] Fix pylint error --- tensorflow/python/keras/callbacks_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/callbacks_test.py b/tensorflow/python/keras/callbacks_test.py index 28f7614463..467bc4cdc4 100644 --- a/tensorflow/python/keras/callbacks_test.py +++ b/tensorflow/python/keras/callbacks_test.py @@ -1234,8 +1234,8 @@ class KerasCallbacksTest(test.TestCase): optimizer_name_scope = 'training/' + model.optimizer.__class__.__name__ graph_def = ops.get_default_graph().as_graph_def() for node in graph_def.node: - if node.name.startswith(optimizer_name_scope): - return + if node.name.startswith(optimizer_name_scope): + return raise RuntimeError('The optimizer operations are not present in the ' 'model graph when the Callback.set_model function ' 'is called') -- GitLab From fbd48c7a8bb088d92988fce4f757d1719e9c57a2 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Mon, 17 Sep 2018 12:24:43 -0700 Subject: [PATCH 047/570] fix type error within an environment variable name --- tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc | 2 +- tensorflow/core/kernels/mkl_conv_grad_input_ops.cc | 2 +- tensorflow/core/kernels/mkl_conv_ops.cc | 2 +- tensorflow/core/util/mkl_util.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index 52157ed5fb..f406ad2ab5 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -853,7 +853,7 @@ class MklConvCustomBackpropFilterOp // MKL DNN allocates large buffers when a conv gradient filter primtive is // created. So we don't cache conv backward primitives when the env - // variable TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE is set to true. + // variable TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE is set to true. bool do_not_cache = MklPrimitiveFactory::IsPrimitiveMemOptEnabled(); conv_bwd_filter = MklConvBwdFilterPrimitiveFactory::Get( convBwdFilterDims, do_not_cache); diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index c38c9cc27c..a501ce2c93 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -713,7 +713,7 @@ class MklConvCustomBackpropInputOp : public MklConvBackpropCommonOp { TFPaddingToMklDnnPadding(this->padding_)); // We don't cache those primitves if the env variable - // TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE is true and if primitve descriptor + // TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE is true and if primitve descriptor // includes potentialy large buffers. MKL DNN allocates buffers // in the following cases // 1. Legacy CPU without AVX512/AVX2, or diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 184e0cb003..b332edad0a 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -901,7 +901,7 @@ class MklConvOp : public OpKernel { // In some cases, primitve descriptor includes potentialy large buffers, // we don't cache those primitves if the env variable - // TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE is true. MKL DNN allocates buffers + // TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE is true. MKL DNN allocates buffers // in the following cases // 1. Legacy CPU without AVX512/AVX2, or // 2. 1x1 convolution with stride != 1 diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 680211edff..5ea8f2ee47 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -2040,7 +2040,7 @@ class MklPrimitiveFactory { /// Fuction to check whether primitive memory optimization is enabled static inline bool IsPrimitiveMemOptEnabled() { bool is_primitive_mem_opt_enabled = true; - TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITVE_MEMUSE", true, + TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE", true, &is_primitive_mem_opt_enabled)); return is_primitive_mem_opt_enabled; } -- GitLab From 6d9bb99ea7a697e465ef66dea821a86ca94f845d Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Mon, 17 Sep 2018 17:22:40 -0700 Subject: [PATCH 048/570] Addressing review comments: indentation --- tensorflow/core/common_runtime/mkl_cpu_allocator.h | 4 +--- tensorflow/core/kernels/partitioned_function_ops.cc | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index 593f855ea2..01e5af5f8c 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -277,9 +277,7 @@ class MklCPUAllocator : public VisitableAllocator { // max_alloc_size from large_size_allocator would be the maximum // size allocated by MklCPUAllocator. stats->max_alloc_size = l_stats.max_alloc_size; - - stats->bytes_limit = - std::max(s_stats.bytes_limit, l_stats.bytes_limit); + stats->bytes_limit = std::max(s_stats.bytes_limit, l_stats.bytes_limit); } void ClearStats() override { diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index ddb621967a..42f99a73e6 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -100,8 +100,8 @@ class PartitionedCallOp : public AsyncOpKernel { // We need to pass global op_registry as default_registry when creating // graph. So that graph optimization passes can lookup all possible ops // by name. - FunctionLibraryDefinition func_lib_def(OpRegistry::Global(), - fbody->graph->flib_def().ToProto()); + FunctionLibraryDefinition func_lib_def( + OpRegistry::Global(), fbody->graph->flib_def().ToProto()); auto graph = tensorflow::MakeUnique(func_lib_def); CopyGraph(*fbody->graph, graph.get()); OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done); @@ -256,7 +256,7 @@ class PartitionedCallOp : public AsyncOpKernel { << partitions.size() << " shards."; FunctionLibraryDefinition func_lib_def(OpRegistry::Global(), - graph->flib_def().ToProto()); + graph->flib_def().ToProto()); for (const auto& partition : partitions) { std::unique_ptr subgraph(new Graph(func_lib_def)); GraphConstructorOptions opts; -- GitLab From 6d67ba41f566e963e2c061ca7df63edad89e1fca Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Tue, 18 Sep 2018 18:56:55 +0300 Subject: [PATCH 049/570] Work out the endianness statically. --- tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h index 5b42de4c5a..484cc4d6f5 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h @@ -20,12 +20,12 @@ limitations under the License. namespace tensorflow { +constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; + class ByteSwapper { public: ByteSwapper(bool big_endian) { - int x = 1; - bool is_little_endian = (*(char *)&x == 1); - swap_ = big_endian == is_little_endian; + swap_ = big_endian == kLittleEndian; } inline void SwapIfRequiredInt16(int16_t *x) const { -- GitLab From 30f28a7f44f39cb8f24fde17252c3e2539c22bb0 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Tue, 18 Sep 2018 09:52:03 -0700 Subject: [PATCH 050/570] change per code style check --- tensorflow/core/util/mkl_util.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 5ea8f2ee47..387e5ee5a6 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ -#define TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ +#ifndef TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ +#define TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ #ifdef INTEL_MKL #include @@ -2040,8 +2040,8 @@ class MklPrimitiveFactory { /// Fuction to check whether primitive memory optimization is enabled static inline bool IsPrimitiveMemOptEnabled() { bool is_primitive_mem_opt_enabled = true; - TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE", true, - &is_primitive_mem_opt_enabled)); + TF_CHECK_OK(ReadBoolFromEnvVar("TF_MKL_OPTIMIZE_PRIMITIVE_MEMUSE", + true, &is_primitive_mem_opt_enabled)); return is_primitive_mem_opt_enabled; } @@ -2098,7 +2098,7 @@ static inline memory::format get_desired_format(int channel, (channel % 8) == 0) { fmt_desired = is_2d ? memory::format::nChw8c - : memory::format::ncdhw; //not support avx2 for 3d yet. + : memory::format::ncdhw; // not support avx2 for 3d yet. } else { fmt_desired = is_2d ? memory::format::nchw : memory::format::ncdhw; } @@ -2210,7 +2210,8 @@ inline primitive FindOrCreateReorder(const memory* from, const memory* to) { // utility function to determine if it is conv 1x1 and stride != 1 // for purpose of temporarily disabling primitive reuse -inline bool IsConv1x1StrideNot1(memory::dims filter_dims, memory::dims strides) { +inline bool IsConv1x1StrideNot1(memory::dims filter_dims, + memory::dims strides) { if (filter_dims.size() != 4 || strides.size() != 2) return false; return ((filter_dims[2] == 1) && (filter_dims[3] == 1) && @@ -2221,4 +2222,4 @@ inline bool IsConv1x1StrideNot1(memory::dims filter_dims, memory::dims strides) } // namespace tensorflow #endif // INTEL_MKL -#endif // TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ +#endif // TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ -- GitLab From 14e9345a88b08f5d2a12f3f441b1d82c041d7ea3 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Tue, 18 Sep 2018 18:23:52 +0000 Subject: [PATCH 051/570] Avoid saving sensitive information in graph. --- .../ignite/kernels/ignite_dataset_ops.cc | 30 ++------- tensorflow/contrib/ignite/ops/dataset_ops.cc | 10 --- .../ignite/python/ops/ignite_dataset_ops.py | 18 +---- .../python/tests/ignite_dataset_test.py | 66 ++++++++++++++----- 4 files changed, 56 insertions(+), 68 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc index e48fce4ed2..bdaed72387 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc +++ b/tensorflow/contrib/ignite/kernels/ignite_dataset_ops.cc @@ -125,35 +125,15 @@ class IgniteDatasetOp : public DatasetOpKernel { OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "page_size", &page_size)); - if (env_username) - username = string(env_username); - else - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "username", &username)); + if (env_username) username = string(env_username); - if (env_password) - password = string(env_password); - else - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "password", &password)); + if (env_password) password = string(env_password); - if (env_certfile) - certfile = string(env_certfile); - else - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "certfile", &certfile)); + if (env_certfile) certfile = string(env_certfile); - if (env_keyfile) - keyfile = string(env_keyfile); - else - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "keyfile", &keyfile)); + if (env_keyfile) keyfile = string(env_keyfile); - if (env_cert_password) - cert_password = string(env_cert_password); - else - OP_REQUIRES_OK(ctx, ParseScalarArgument(ctx, "cert_password", - &cert_password)); + if (env_cert_password) cert_password = string(env_cert_password); const Tensor* schema_tensor; OP_REQUIRES_OK(ctx, ctx->input("schema", &schema_tensor)); diff --git a/tensorflow/contrib/ignite/ops/dataset_ops.cc b/tensorflow/contrib/ignite/ops/dataset_ops.cc index 7d18df11aa..3d6fbe00e6 100644 --- a/tensorflow/contrib/ignite/ops/dataset_ops.cc +++ b/tensorflow/contrib/ignite/ops/dataset_ops.cc @@ -26,11 +26,6 @@ REGISTER_OP("IgniteDataset") .Input("local: bool") .Input("part: int32") .Input("page_size: int32") - .Input("username: string") - .Input("password: string") - .Input("certfile: string") - .Input("keyfile: string") - .Input("cert_password: string") .Input("schema: int32") .Input("permutation: int32") .Output("handle: variant") @@ -54,11 +49,6 @@ port: Ignite Thin Client Port. local: Local flag that defines that data should be fetched from local host only. part: Partition data should be fetched from. page_size: Page size for Ignite Thin Client. -username: Username to authenticate via Ignite Thin Client. -password: Password to authenticate via Ignite Thin Client. -certfile: SSL certificate to establish SSL connection. -keyfile: Private key file to establish SSL connection. -cert_password: SSL certificate password to establish SSL connection. schema: Internal structure that defines schema of cache objects. permutation: Internal structure that defines permutation of cache objects. )doc"); diff --git a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py index c0e24b1c69..7fc9e1fdd1 100644 --- a/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py +++ b/tensorflow/contrib/ignite/python/ops/ignite_dataset_ops.py @@ -732,18 +732,6 @@ class IgniteDataset(Dataset): self.part = ops.convert_to_tensor(part, dtype=dtypes.int32, name="part") self.page_size = ops.convert_to_tensor(page_size, dtype=dtypes.int32, name="page_size") - self.username = ops.convert_to_tensor("" if username is None else username, - dtype=dtypes.string, name="username") - self.password = ops.convert_to_tensor("" if password is None else password, - dtype=dtypes.string, name="password") - self.certfile = ops.convert_to_tensor("" if certfile is None else certfile, - dtype=dtypes.string, name="certfile") - self.keyfile = ops.convert_to_tensor("" if keyfile is None else keyfile, - dtype=dtypes.string, name="keyfile") - self.cert_password = ops.convert_to_tensor("" if cert_password is None - else cert_password, - dtype=dtypes.string, - name="cert_password") self.schema = ops.convert_to_tensor(self.cache_type.to_flat(), dtype=dtypes.int32, name="schema") self.permutation = ops.convert_to_tensor(self.cache_type.to_permutation(), @@ -753,10 +741,8 @@ class IgniteDataset(Dataset): def _as_variant_tensor(self): return gen_dataset_ops.ignite_dataset(self.cache_name, self.host, self.port, self.local, self.part, - self.page_size, self.username, - self.password, self.certfile, - self.keyfile, self.cert_password, - self.schema, self.permutation) + self.page_size, self.schema, + self.permutation) @property def output_classes(self): diff --git a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py index 933e62b804..5d74617690 100644 --- a/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py +++ b/tensorflow/contrib/ignite/python/tests/ignite_dataset_test.py @@ -35,28 +35,60 @@ class IgniteDatasetTest(test.TestCase): """ def test_ignite_dataset_with_plain_client(self): + """Test Ignite Dataset with plain client. + """ + self._clear_env() ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42300) - self.__check_dataset(ds) + self._check_dataset(ds) def test_ignite_dataset_with_ssl_client(self): - ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301,\ - certfile=os.path.dirname(os.path.realpath(__file__)) +\ - "/keystore/client.pem", cert_password="123456") - self.__check_dataset(ds) + """Test Ignite Dataset with ssl client. + """ + self._clear_env() + os.environ["IGNITE_DATASET_CERTFILE"] = os.path.dirname( + os.path.realpath(__file__)) + "/keystore/client.pem" + os.environ["IGNITE_DATASET_CERT_PASSWORD"] = "123456" + + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42301, + certfile=os.environ["IGNITE_DATASET_CERTFILE"], + cert_password=os.environ["IGNITE_DATASET_CERT_PASSWORD"]) + self._check_dataset(ds) def test_ignite_dataset_with_ssl_client_and_auth(self): - ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302,\ - certfile=os.path.dirname(os.path.realpath(__file__)) +\ - "/keystore/client.pem", cert_password="123456",\ - username="ignite", password="ignite") - self.__check_dataset(ds) + """Test Ignite Dataset with ssl client and authentication. + """ + self._clear_env() + os.environ['IGNITE_DATASET_USERNAME'] = "ignite" + os.environ['IGNITE_DATASET_PASSWORD'] = "ignite" + os.environ['IGNITE_DATASET_CERTFILE'] = os.path.dirname( + os.path.realpath(__file__)) + "/keystore/client.pem" + os.environ['IGNITE_DATASET_CERT_PASSWORD'] = "123456" + + ds = IgniteDataset(cache_name="SQL_PUBLIC_TEST_CACHE", port=42302, + certfile=os.environ['IGNITE_DATASET_CERTFILE'], + cert_password=os.environ['IGNITE_DATASET_CERT_PASSWORD'], + username=os.environ['IGNITE_DATASET_USERNAME'], + password=os.environ['IGNITE_DATASET_PASSWORD']) + self._check_dataset(ds) + + def _clear_env(self): + """Clears environment variables used by Ignite Dataset. + """ + if 'IGNITE_DATASET_USERNAME' in os.environ: + del os.environ['IGNITE_DATASET_USERNAME'] + if 'IGNITE_DATASET_PASSWORD' in os.environ: + del os.environ['IGNITE_DATASET_PASSWORD'] + if 'IGNITE_DATASET_CERTFILE' in os.environ: + del os.environ['IGNITE_DATASET_CERTFILE'] + if 'IGNITE_DATASET_CERT_PASSWORD' in os.environ: + del os.environ['IGNITE_DATASET_CERT_PASSWORD'] - def __check_dataset(self, dataset): + def _check_dataset(self, dataset): """Checks that dataset provids correct data. """ - self.assertEquals(tf.int64, dataset.output_types['key']) - self.assertEquals(tf.string, dataset.output_types['val']['NAME']) - self.assertEquals(tf.int64, dataset.output_types['val']['VAL']) + self.assertEqual(tf.int64, dataset.output_types['key']) + self.assertEqual(tf.string, dataset.output_types['val']['NAME']) + self.assertEqual(tf.int64, dataset.output_types['val']['VAL']) it = dataset.make_one_shot_iterator() ne = it.get_next() @@ -66,11 +98,11 @@ class IgniteDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(ne) - self.assertEquals({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\ + self.assertEqual({'key': 1, 'val': {'NAME': b'TEST1', 'VAL': 42}},\ rows[0]) - self.assertEquals({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\ + self.assertEqual({'key': 2, 'val': {'NAME': b'TEST2', 'VAL': 43}},\ rows[1]) - self.assertEquals({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\ + self.assertEqual({'key': 3, 'val': {'NAME': b'TEST3', 'VAL': 44}},\ rows[2]) if __name__ == "__main__": -- GitLab From 1e821cd9a02b59a90a8b983759cf74eded16265f Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Wed, 19 Sep 2018 11:06:40 -0700 Subject: [PATCH 052/570] Fix bug in metrics sparse_categorical_accuracy and sparse_top_k_categorical_accuracy --- tensorflow/python/keras/metrics.py | 15 ++++++++------ tensorflow/python/keras/metrics_test.py | 26 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/keras/metrics.py b/tensorflow/python/keras/metrics.py index e64241e5cf..2fd3244800 100644 --- a/tensorflow/python/keras/metrics.py +++ b/tensorflow/python/keras/metrics.py @@ -635,7 +635,9 @@ def categorical_accuracy(y_true, y_pred): @tf_export('keras.metrics.sparse_categorical_accuracy') def sparse_categorical_accuracy(y_true, y_pred): - y_true = math_ops.reduce_max(y_true, axis=-1) + # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) + if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))): + y_true = array_ops.squeeze(y_true, [-1]) y_pred = math_ops.argmax(y_pred, axis=-1) # If the expected labels are float, we need to cast the int returned by @@ -654,11 +656,12 @@ def top_k_categorical_accuracy(y_true, y_pred, k=5): @tf_export('keras.metrics.sparse_top_k_categorical_accuracy') def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): - return K.mean( - nn.in_top_k(y_pred, - math_ops.cast(math_ops.reduce_max(y_true, axis=-1), 'int32'), - k), - axis=-1) + # If the shape of y_true is (num_samples, 1), squeeze to (num_samples,) + if (len(K.int_shape(y_true)) == len(K.int_shape(y_pred))): + y_true = array_ops.squeeze(y_true, [-1]) + + return K.mean(nn.in_top_k(y_pred, math_ops.cast(y_true, 'int32'), k), + axis=-1) # Aliases diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py index 4195ea18ad..43ac5b7ead 100644 --- a/tensorflow/python/keras/metrics_test.py +++ b/tensorflow/python/keras/metrics_test.py @@ -54,6 +54,18 @@ class KerasMetricsTest(test.TestCase): y_pred = K.variable(np.random.random((6, 7))) self.assertEqual(K.eval(metric(y_true, y_pred)).shape, (6,)) + # Test correctness if the shape of y_true is (num_samples,) + y_true = K.variable([1., 0., 0., 0.]) + y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) + print(K.eval(metric(y_true, y_pred))) + self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.]) + + # Test correctness if the shape of y_true is (num_samples, 1) + y_true = K.variable([[1.], [0.], [0.], [0.]]) + y_pred = K.variable([[0.8, 0.2], [0.6, 0.4], [0.7, 0.3], [0.9, 0.1]]) + print(K.eval(metric(y_true, y_pred))) + self.assertAllEqual(K.eval(metric(y_true, y_pred)), [0., 1., 1., 1.]) + def test_sparse_categorical_accuracy_float(self): with self.cached_session(): metric = metrics.sparse_categorical_accuracy @@ -79,6 +91,7 @@ class KerasMetricsTest(test.TestCase): def test_sparse_top_k_categorical_accuracy(self): with self.cached_session(): + # Test correctness if the shape of y_true is (num_samples, 1) y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([[1], [0]])) result = K.eval( @@ -91,6 +104,19 @@ class KerasMetricsTest(test.TestCase): metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(result, 0.) + # Test correctness if the shape of y_true is (num_samples,) + y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) + y_true = K.variable(np.array([1, 0])) + result = K.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) + self.assertEqual(result, 1) + result = K.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) + self.assertEqual(result, 0.5) + result = K.eval( + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) + self.assertEqual(result, 0.) + def test_top_k_categorical_accuracy(self): with self.cached_session(): y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) -- GitLab From 78e205d35b31aa49e8dac357d827900a165f0a21 Mon Sep 17 00:00:00 2001 From: Erik Smistad Date: Thu, 20 Sep 2018 15:56:34 +0200 Subject: [PATCH 053/570] Added warning message if cmake version is below 3.8 or host toolset is not set to x64 on windows --- tensorflow/contrib/cmake/CMakeLists.txt | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 225c5e6227..a7a66472df 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -1,8 +1,14 @@ # Minimum CMake required +cmake_minimum_required(VERSION 3.5) + if(WIN32) - cmake_minimum_required(VERSION 3.8) -else() - cmake_minimum_required(VERSION 3.5) + if(${CMAKE_VERSION} VERSION_LESS "3.8") + message(WARNING "Your current cmake version is ${CMAKE_VERSION} which does not support setting the toolset architecture to x64. This may cause \"compiler out of heap space\" errors when building. Consider upgrading your cmake to > 3.8 and using the flag -Thost=x64 when running cmake.") + else() + if(NOT CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE OR NOT "${CMAKE_VS_PLATFORM_TOOLSET_HOST_ARCHITECTURE}" STREQUAL "x64") + message(WARNING "Your current cmake generator is set to use 32 bit toolset architecture. This may cause \"compiler out of heap space\" errors when building. Consider using the flag -Thost=x64 when running cmake.") + endif() + endif() endif() # Project -- GitLab From dcd63fab37f686a069b54a7653254bbb15a2bf20 Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Thu, 20 Sep 2018 11:04:25 -0700 Subject: [PATCH 054/570] Fix for failing eager:function_test --- tensorflow/core/kernels/partitioned_function_ops.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index 42f99a73e6..7a5a2ff8fa 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -100,9 +100,9 @@ class PartitionedCallOp : public AsyncOpKernel { // We need to pass global op_registry as default_registry when creating // graph. So that graph optimization passes can lookup all possible ops // by name. - FunctionLibraryDefinition func_lib_def( - OpRegistry::Global(), fbody->graph->flib_def().ToProto()); - auto graph = tensorflow::MakeUnique(func_lib_def); + auto graph = tensorflow::MakeUnique(fbody->graph->flib_def()); + FunctionLibraryDefinition global_flib(OpRegistry::Global(), {}); + graph.get()->AddFunctionLibrary(global_flib.ToProto()); CopyGraph(*fbody->graph, graph.get()); OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done); @@ -255,10 +255,10 @@ class PartitionedCallOp : public AsyncOpKernel { VLOG(3) << "Partitioned function '" << func_.name() << "', yielding " << partitions.size() << " shards."; - FunctionLibraryDefinition func_lib_def(OpRegistry::Global(), - graph->flib_def().ToProto()); for (const auto& partition : partitions) { - std::unique_ptr subgraph(new Graph(func_lib_def)); + std::unique_ptr subgraph(new Graph(graph->flib_def())); + FunctionLibraryDefinition global_flib(OpRegistry::Global(), {}); + subgraph.get()->AddFunctionLibrary(global_flib.ToProto()); GraphConstructorOptions opts; opts.allow_internal_ops = true; opts.expect_device_spec = true; -- GitLab From 039ddaa6c0af4be4291383564db5a964d0035c1d Mon Sep 17 00:00:00 2001 From: Yanbo Liang Date: Thu, 20 Sep 2018 15:49:40 -0700 Subject: [PATCH 055/570] Fix bad indentation --- tensorflow/python/keras/metrics_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/keras/metrics_test.py b/tensorflow/python/keras/metrics_test.py index 43ac5b7ead..5f5565d4d5 100644 --- a/tensorflow/python/keras/metrics_test.py +++ b/tensorflow/python/keras/metrics_test.py @@ -108,13 +108,13 @@ class KerasMetricsTest(test.TestCase): y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) y_true = K.variable(np.array([1, 0])) result = K.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) self.assertEqual(result, 1) result = K.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) self.assertEqual(result, 0.5) result = K.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) + metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) self.assertEqual(result, 0.) def test_top_k_categorical_accuracy(self): -- GitLab From 16a257eb598b7dfd220249babf8d18c984aab103 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 21 Sep 2018 09:43:22 -0700 Subject: [PATCH 056/570] change back MICRA def - coding styling --- tensorflow/core/util/mkl_util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index 387e5ee5a6..f371fd6f95 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ -#define TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ +#ifndef TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ +#define TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ #ifdef INTEL_MKL #include @@ -2222,4 +2222,4 @@ inline bool IsConv1x1StrideNot1(memory::dims filter_dims, } // namespace tensorflow #endif // INTEL_MKL -#endif // TENSORFLOW_TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ +#endif // TENSORFLOW_CORE_UTIL_MKL_UTIL_H_ -- GitLab From 59a47b7d330a40971bad89f0e8aa282e79e889f1 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Fri, 21 Sep 2018 09:56:29 -0700 Subject: [PATCH 057/570] refine a comment per Tatiana's suggestions --- tensorflow/core/util/mkl_util.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/util/mkl_util.h b/tensorflow/core/util/mkl_util.h index f371fd6f95..2f2705de92 100644 --- a/tensorflow/core/util/mkl_util.h +++ b/tensorflow/core/util/mkl_util.h @@ -2098,7 +2098,7 @@ static inline memory::format get_desired_format(int channel, (channel % 8) == 0) { fmt_desired = is_2d ? memory::format::nChw8c - : memory::format::ncdhw; // not support avx2 for 3d yet. + : memory::format::ncdhw; // no avx2 support for 3d yet. } else { fmt_desired = is_2d ? memory::format::nchw : memory::format::ncdhw; } -- GitLab From 268bf6b118646c8e93162d591263bca907c7db28 Mon Sep 17 00:00:00 2001 From: AG Ramesh Date: Fri, 21 Sep 2018 11:39:29 -0700 Subject: [PATCH 058/570] Removing dead code. With the addition of mkl slice using MKL DNN this code will not longer be executed --- tensorflow/core/kernels/slice_op.cc | 198 ---------------------------- 1 file changed, 198 deletions(-) diff --git a/tensorflow/core/kernels/slice_op.cc b/tensorflow/core/kernels/slice_op.cc index 77594479cb..83377ffab5 100644 --- a/tensorflow/core/kernels/slice_op.cc +++ b/tensorflow/core/kernels/slice_op.cc @@ -228,190 +228,6 @@ class SliceOp : public OpKernel { } }; -#ifdef INTEL_MKL -template -class MklSliceOp : public OpKernel { - public: - explicit MklSliceOp(OpKernelConstruction* context) : OpKernel(context) {} - - void Compute(OpKernelContext* context) override { - TensorShape output_shape; - gtl::InlinedVector begin; - gtl::InlinedVector size; - Tensor* result = nullptr; - bool done = false; - SharedSliceCommonCases(context, &output_shape, &begin, &size, &result, - &done); - if (!context->status().ok() || done == true) return; - - const Tensor& input = context->input(0); - const int input_dims = input.dims(); - - if (output_shape.num_elements() > 0) { - if (std::is_same::value && input_dims == 2 && - DataTypeCanUseMemcpy(DataTypeToEnum::v())) { - auto input = context->input(0).tensor(); - auto output = result->tensor(); - // TODO(agarwal): Consider multi-threading this loop for cases where - // size[0] is very large. - for (int i = 0; i < size[0]; ++i) { - const int64 row = begin[0] + i; - if (i + 1 < size[0]) { - port::prefetch(&output(i + 1, 0)); - port::prefetch(&input(row + 1, begin[1])); - } - memcpy(&output(i, 0), &input(row, begin[1]), size[1] * sizeof(T)); - } - return; - } -#define HANDLE_DIM(NDIM) \ - if (input_dims == NDIM) { \ - HandleCase(context, begin, size, result); \ - return; \ - } - - HANDLE_DIM(1); - HANDLE_DIM(2); - HANDLE_DIM(3); - HANDLE_DIM(4); - HANDLE_DIM(5); - HANDLE_DIM(6); - HANDLE_DIM(7); - -#undef HANDLE_DIM - - OP_REQUIRES( - context, false, - errors::Unimplemented("SliceOp : Unhandled input dimensions")); - } - } - - private: - // Helper function for DoesSliceShapeDifferInOnly1D. Checks if the following - // criteria matches for slice_dim: if indices for slice are 0 in all dims - // except slice_dim and if sizes of all the dimensions of the slice are same - // as the sizes of all the dimensions of the input except slice_dim, then - // returns True. Otherwise, returns False. - bool DoesSliceShapeDifferInOnly1DHelper(const TensorShape& input_shape, - const gtl::ArraySlice& begin, - const gtl::ArraySlice& size, - int slice_dim) { - for (int dim = 0; dim < 4; dim++) { - if (dim != slice_dim && - (begin[dim] != 0 || size[dim] != input_shape.dim_size(dim))) { - return false; - } - } - return true; - } - - // Is 'input' tensor being sliced over a single dimension out of 4? - // - // This check is applicable in the context of Slice of a 4-D tensor in - // NHWC or NCHW format over channel dimension. - // - // If indices for slice are 0 in all dims except one dimension and if sizes of - // all dimensions of slice are same as sizes of all dimensions of inputs - // except that dimension, then we are slicing over a single dimension. - // - // Returns True if Slicing over a single dimension, and sets slice_dim - // to the number of the dimension that satisfies criteria. - bool DoesSliceShapeDifferInOnly1D(const TensorShape& input_shape, - const gtl::ArraySlice& begin, - const gtl::ArraySlice& size, - int* slice_dim) { - for (int dim = 0; dim < 4; dim++) { - if (DoesSliceShapeDifferInOnly1DHelper(input_shape, begin, size, dim)) { - *slice_dim = dim; - return true; - } - } - return false; - } - - template - void HandleCase(OpKernelContext* context, const gtl::ArraySlice& begin, - const gtl::ArraySlice& size, Tensor* result) { - int slice_dim = -1; - TensorShape in_shape = context->input(0).shape(); - // Special case for handling 4-D tensor slice when shape of the slice - // differs from the input tensor in only 1 out of 4 dimensions. - // This case arises in the context of Slice of 4-D tensor in NHWC or NCHW - // format over channel dimension. - if (NDIM == 4 && - DoesSliceShapeDifferInOnly1D(in_shape, begin, size, &slice_dim)) { - size_t in_strides[4] = { - (size_t)in_shape.dim_size(1) * in_shape.dim_size(2) * - in_shape.dim_size(3), - (size_t)in_shape.dim_size(2) * in_shape.dim_size(3), - (size_t)in_shape.dim_size(3), (size_t)1}; - - size_t out_strides[4] = {(size_t)size[1] * size[2] * size[3], - (size_t)size[2] * size[3], (size_t)size[3], - (size_t)1}; - - T* in_buf = const_cast( - const_cast(context->input(0).flat().data())); - T* op_buf = result->flat().data(); - - if (slice_dim == 1) { - /* data format = NCHW */ - -#pragma omp parallel for - for (ssize_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) { - T* ip = in_buf + (d0 * in_strides[0]); - T* op = op_buf + ((d0 - begin[0]) * out_strides[0]); -#pragma omp parallel for - for (ssize_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) { - T* ip1 = ip + (d1 * in_strides[1]); - T* op1 = op + ((d1 - begin[1]) * out_strides[1]); - // For NCHW, H and W will be contiguous. So we can copy - // both with one memcpy. - memcpy(static_cast(op1), static_cast(ip1), - sizeof(T) * in_strides[1]); - } - } - return; - } else if (slice_dim == 3) { - /* data_format = NHWC */ - -#pragma omp parallel for - for (ssize_t d0 = begin[0]; d0 < begin[0] + size[0]; d0++) { - T* ip = in_buf + (d0 * in_strides[0]); - T* op = op_buf + ((d0 - begin[0]) * out_strides[0]); -#pragma omp parallel for - for (ssize_t d1 = begin[1]; d1 < begin[1] + size[1]; d1++) { - T* ip1 = ip + (d1 * in_strides[1]); - T* op1 = op + ((d1 - begin[1]) * out_strides[1]); -#pragma omp parallel for - for (ssize_t d2 = begin[2]; d2 < begin[2] + size[2]; d2++) { - T* ip2 = ip1 + (d2 * in_strides[2]); - T* ip3 = ip2 + begin[3]; - T* op2 = op1 + ((d2 - begin[2]) * out_strides[2]); - T* op3 = op2; - memcpy(static_cast(op3), static_cast(ip3), - sizeof(T) * size[3]); - } - } - } - return; - } - // slice_dim is not 1 or 3, then we fallback to Eigen implementation. - } - - Eigen::DSizes indices; - Eigen::DSizes sizes; - for (int i = 0; i < NDIM; ++i) { - indices[i] = begin[i]; - sizes[i] = size[i]; - } - - functor::Slice()( - context->eigen_device(), result->tensor(), - context->input(0).tensor(), indices, sizes); - } -}; -#endif // Forward declarations of the functor specializations for declared in the // sharded source files. @@ -440,7 +256,6 @@ TF_CALL_ALL_TYPES(DECLARE_FOR_N); #undef DECLARE_CPU_SPEC } // namespace functor -#ifndef INTEL_MKL #define REGISTER_SLICE(type) \ REGISTER_KERNEL_BUILDER(Name("Slice") \ .Device(DEVICE_CPU) \ @@ -452,19 +267,6 @@ TF_CALL_ALL_TYPES(DECLARE_FOR_N); TF_CALL_POD_STRING_TYPES(REGISTER_SLICE); TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE); #undef REGISTER_SLICE -#else -#define REGISTER_SLICE(type) \ - REGISTER_KERNEL_BUILDER(Name("Slice") \ - .Device(DEVICE_CPU) \ - .TypeConstraint("T") \ - .HostMemory("begin") \ - .HostMemory("size"), \ - MklSliceOp) - -TF_CALL_POD_STRING_TYPES(REGISTER_SLICE); -TF_CALL_QUANTIZED_TYPES(REGISTER_SLICE); -#undef REGISTER_SLICE -#endif // INTEL_MKL #if GOOGLE_CUDA // Forward declarations of the functor specializations for GPU. -- GitLab From 457ef66c2d4985000aa1d1a9bc643f66bbddd46d Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Fri, 21 Sep 2018 12:58:32 -0700 Subject: [PATCH 059/570] Fix long lines --- tensorflow/python/keras/layers/embeddings.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/layers/embeddings.py b/tensorflow/python/keras/layers/embeddings.py index a0b9393812..76e551a7ce 100644 --- a/tensorflow/python/keras/layers/embeddings.py +++ b/tensorflow/python/keras/layers/embeddings.py @@ -142,12 +142,14 @@ class Embedding(Layer): else: in_lens = [self.input_length] if len(in_lens) != len(input_shape) - 1: - raise ValueError('"input_length" is %s, but received input has shape %s' % + raise ValueError('"input_length" is %s, ' + 'but received input has shape %s' % (str(self.input_length), str(input_shape))) else: for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])): if s1 is not None and s2 is not None and s1 != s2: - raise ValueError('"input_length" is %s, but received input has shape %s' % + raise ValueError('"input_length" is %s, ' + 'but received input has shape %s' % (str(self.input_length), str(input_shape))) elif s1 is None: in_lens[i] = s2 -- GitLab From 282d6e7c384c83f9b6bf43b7b37eb606ccc64d06 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Fri, 21 Sep 2018 12:59:15 -0700 Subject: [PATCH 060/570] Fix long lines --- tensorflow/python/ops/nn_ops.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 17e10995f2..a68422c315 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -818,12 +818,14 @@ class Convolution(object): try: input_shape.with_rank(num_spatial_dims + 2) except ValueError: - raise ValueError("input tensor must have rank %d" % (num_spatial_dims + 2)) + raise ValueError("input tensor must have rank %d" % + (num_spatial_dims + 2)) try: filter_shape.with_rank(num_spatial_dims + 2) except ValueError: - raise ValueError("filter tensor must have rank %d" % (num_spatial_dims + 2)) + raise ValueError("filter tensor must have rank %d" % + (num_spatial_dims + 2)) if data_format is None or not data_format.startswith("NC"): input_channels_dim = input_shape[num_spatial_dims + 1] -- GitLab From 6dd7a09211cc74d11ff1554624b527c432020cbc Mon Sep 17 00:00:00 2001 From: wangsiyu Date: Sun, 23 Sep 2018 20:33:19 +0800 Subject: [PATCH 061/570] Enable partitioned variable assignments --- .../python/kernel_tests/variables_test.py | 43 ++++++++++++++++- tensorflow/python/ops/variables.py | 47 +++++++++++++++++-- 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py index 2e7975667c..687784c8b7 100644 --- a/tensorflow/python/kernel_tests/variables_test.py +++ b/tensorflow/python/kernel_tests/variables_test.py @@ -673,7 +673,7 @@ class PartitionedVariableTest(test.TestCase): v0._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1])) v1._set_save_slice_info( - variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1])) + variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1])) partitions = [2] variables.PartitionedVariable( @@ -696,6 +696,47 @@ class PartitionedVariableTest(test.TestCase): variable_list=[v0], partitions=partitions) + def testPartitionedVariableAssignments(self): + with ops.Graph().as_default(), self.cached_session() as sess: + v0 = variables.Variable(initial_value=[0.0]) + v1 = variables.Variable(initial_value=[1.0]) + v0._set_save_slice_info( + variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1])) + v1._set_save_slice_info( + variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1])) + partitions = [2] + + # Pass variable_list as [v1, v0] to ensure they are properly + # re-sorted to [v0, v1] based on their slice info offsets. + partitioned_variable = variables.PartitionedVariable( + name="two_vars", + shape=[2], + dtype=v0.dtype, + variable_list=[v0, v1], + partitions=partitions) + + deltas_a = constant_op.constant([1.0, 2.0]) + deltas_b = constant_op.constant([3.0, 4.0]) + ones = array_ops.ones([2]) + plus_delta = partitioned_variable.assign_add(deltas_a) + minus_delta = partitioned_variable.assign_sub(deltas_b) + assign_ones = partitioned_variable.assign(ones) + variables.global_variables_initializer().run() + + self.assertEqual([1.0], plus_delta[0].eval()) + self.assertEqual([1.0], v0.eval()) + self.assertEqual([3.0], plus_delta[1].eval()) + self.assertEqual([3.0], v1.eval()) + + self.assertEqual([-2.0], minus_delta[0].eval()) + self.assertEqual([-2.0], v0.eval()) + self.assertEqual([-1.0], minus_delta[1].eval()) + self.assertEqual([-1.0], v1.eval()) + + self.assertEqual([1.0], assign_ones[0].eval()) + self.assertEqual([1.0], v0.eval()) + self.assertEqual([1.0], assign_ones[1].eval()) + self.assertEqual([1.0], v1.eval()) class VariableContainerTest(test.TestCase): diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 7a46157739..2d6a767fed 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -2395,11 +2395,50 @@ class PartitionedVariable(object): def _get_partitions(self): return self._partitions - def assign(self, value, use_locking=False): - _ = value, use_locking - raise NotImplementedError( - "assign() has not been implemented for PartitionedVariable.") + def _apply_assign_fn(self, + assign_fn, + value): + partition_axes = self._partition_axes() + if len(partition_axes) > 1: + raise NotImplementedError( + "Cannot concatenate along more than one dimension: %s. " + "Multi-axis partition assign_fn is not supported" % str(partition_axes)) + partition_ix = partition_axes[0] + size_splits_list = [ + var.shape[partition_ix].value for var in self._variable_list] + value_list = array_ops.split( + value, size_splits_list, axis=partition_ix) + op_list = [ + assign_fn(var, value_list[idx], idx) \ + for idx, var in enumerate(self._variable_list)] + return op_list + def assign(self, value, use_locking=False, name=None, read_value=True): + assign_fn = lambda var, r_value, idx: var.assign( + r_value, use_locking=use_locking, + name="%s_%d" % (name, idx), read_value=read_value) + assign_list = self._apply_assign_fn(assign_fn, value) + if read_value: + return assign_list + return [assign.op for assign in assign_list] + + def assign_add(self, value, use_locking=False, name=None, read_value=True): + assign_fn = lambda var, r_value, idx: var.assign_add( + r_value, use_locking=use_locking, + name="%s_%d" % (name, idx), read_value=read_value) + assign_list = self._apply_assign_fn(assign_fn, value) + if read_value: + return assign_list + return [assign.op for assign in assign_list] + + def assign_sub(self, value, use_locking=False, name=None, read_value=True): + assign_fn = lambda var, r_value, idx: var.assign_sub( + r_value, use_locking=use_locking, + name="%s_%d" % (name, idx), read_value=read_value) + assign_list = self._apply_assign_fn(assign_fn, value) + if read_value: + return assign_list + return [assign.op for assign in assign_list] @tf_export("global_variables") def global_variables(scope=None): -- GitLab From a4eecdb369ecdae3b7fe7c1415d7b3b55bcc7b9e Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sun, 23 Sep 2018 17:14:53 +0000 Subject: [PATCH 062/570] Fix GPU build issue on python 3 Signed-off-by: Yong Tang --- tensorflow/contrib/image/kernels/image_ops.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tensorflow/contrib/image/kernels/image_ops.h b/tensorflow/contrib/image/kernels/image_ops.h index 6b63eed130..7fac774d07 100644 --- a/tensorflow/contrib/image/kernels/image_ops.h +++ b/tensorflow/contrib/image/kernels/image_ops.h @@ -71,14 +71,7 @@ class ProjectiveGenerator { (transform[3] * output_x + transform[4] * output_y + transform[5]) / projection; - // TODO(ringwalt): Add a fill value input. -#if (defined __CUDA_ARCH__) && (CUDART_VERSION < 8000) - // On CUDA versions previous to 8.0, only __shared__ variables - // could be declared as static in the device code. const T fill_value = T(0); -#else - static const T fill_value = T(0); -#endif switch (interpolation_) { case INTERPOLATION_NEAREST: // Switch the order of x and y again for indexing into the image. -- GitLab From 8f4ded5884684f40b4912d95c717b185340996b8 Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Mon, 24 Sep 2018 11:07:21 +0300 Subject: [PATCH 063/570] Fix clang styles. --- tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h index 484cc4d6f5..6753c67701 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h @@ -24,9 +24,7 @@ constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; class ByteSwapper { public: - ByteSwapper(bool big_endian) { - swap_ = big_endian == kLittleEndian; - } + ByteSwapper(bool big_endian) { swap_ = big_endian == kLittleEndian; } inline void SwapIfRequiredInt16(int16_t *x) const { if (swap_) { -- GitLab From 90c68770467701a23d23a85c5d769f6f4fa39f0f Mon Sep 17 00:00:00 2001 From: Anton Dmitriev Date: Mon, 24 Sep 2018 12:14:45 +0300 Subject: [PATCH 064/570] Fix byte-order issue. --- tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h index 6753c67701..46df3e39dc 100644 --- a/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h +++ b/tensorflow/contrib/ignite/kernels/ignite_byte_swapper.h @@ -17,14 +17,13 @@ limitations under the License. #define TENSORFLOW_CONTRIB_IGNITE_KERNELS_IGNITE_BYTE_SWAPPER_H_ #include +#include "tensorflow/core/platform/byte_order.h" namespace tensorflow { -constexpr bool kLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__; - class ByteSwapper { public: - ByteSwapper(bool big_endian) { swap_ = big_endian == kLittleEndian; } + ByteSwapper(bool big_endian) { swap_ = big_endian == port::kLittleEndian; } inline void SwapIfRequiredInt16(int16_t *x) const { if (swap_) { -- GitLab From f0886f7269de900d226455d4831722f6fc94a71b Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Tue, 25 Sep 2018 09:59:17 +0800 Subject: [PATCH 065/570] Fix build dependencies in tensorflow/cc/BUILD. --- tensorflow/cc/BUILD | 1 + tensorflow/python/kernel_tests/relu_op_test.py | 4 ++-- tensorflow/python/ops/nn_ops.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index f56521dac0..e99d15f85d 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -410,6 +410,7 @@ tf_cc_test( srcs = ["gradients/nn_grad_test.cc"], deps = [ ":cc_ops", + ":cc_ops_internal", ":grad_op_registry", ":grad_testutil", ":gradient_checker", diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 86d9c90e83..d97a1613b9 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -351,7 +351,7 @@ class LeakyReluTest(test.TestCase): self.assertLess(err, 1e-10) def testGradGradFloat32(self): - with compat.forward_compatibility_horizon(2018, 10, 2): + with compat.forward_compatibility_horizon(2018, 11, 2): with self.test_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], @@ -369,7 +369,7 @@ class LeakyReluTest(test.TestCase): self.assertLess(err, 1e-4) def testGradGradFloat64(self): - with compat.forward_compatibility_horizon(2018, 10, 2): + with compat.forward_compatibility_horizon(2018, 11, 2): with self.test_session(): x = constant_op.constant( [-0.9, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 0.9], diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index d646245ce3..2861f40586 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1601,7 +1601,7 @@ def leaky_relu(features, alpha=0.2, name=None): features = ops.convert_to_tensor(features, name="features") if features.dtype.is_integer: features = math_ops.to_float(features) - if compat.forward_compatible(2018, 10, 1): + if compat.forward_compatible(2018, 11, 1): return gen_nn_ops.leaky_relu(features, alpha=alpha, name=name) alpha = ops.convert_to_tensor(alpha, dtype=features.dtype, name="alpha") return math_ops.maximum(alpha * features, features, name=name) -- GitLab From c12a90e45c5f94b80289f4278f81be4a0348fa19 Mon Sep 17 00:00:00 2001 From: wangsiyu Date: Tue, 25 Sep 2018 13:51:36 +0800 Subject: [PATCH 066/570] fix pylint --- tensorflow/python/ops/variables.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 2d6a767fed..d058478d58 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -2402,7 +2402,8 @@ class PartitionedVariable(object): if len(partition_axes) > 1: raise NotImplementedError( "Cannot concatenate along more than one dimension: %s. " - "Multi-axis partition assign_fn is not supported" % str(partition_axes)) + "Multi-axis partition assign_fn is not supported " + % str(partition_axes)) partition_ix = partition_axes[0] size_splits_list = [ var.shape[partition_ix].value for var in self._variable_list] -- GitLab From 3d60d636de59449a8448cbcbcd71af82e2871538 Mon Sep 17 00:00:00 2001 From: wangsiyu Date: Tue, 25 Sep 2018 13:53:36 +0800 Subject: [PATCH 067/570] fix back variabe name --- tensorflow/python/kernel_tests/variables_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py index 687784c8b7..0b101529fe 100644 --- a/tensorflow/python/kernel_tests/variables_test.py +++ b/tensorflow/python/kernel_tests/variables_test.py @@ -673,7 +673,7 @@ class PartitionedVariableTest(test.TestCase): v0._set_save_slice_info( variables.Variable.SaveSliceInfo(v0.name, [2], [0], [1])) v1._set_save_slice_info( - variables.Variable.SaveSliceInfo(v1.name, [2], [1], [1])) + variables.Variable.SaveSliceInfo(v0.name, [2], [1], [1])) partitions = [2] variables.PartitionedVariable( -- GitLab From 21d4e8bb30a1753a81edd4912881d95b47ae3d1c Mon Sep 17 00:00:00 2001 From: wangsiyu Date: Tue, 25 Sep 2018 15:50:10 +0800 Subject: [PATCH 068/570] remove warning lines --- tensorflow/python/ops/variables.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index d058478d58..69f63bc8e6 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -2401,7 +2401,6 @@ class PartitionedVariable(object): partition_axes = self._partition_axes() if len(partition_axes) > 1: raise NotImplementedError( - "Cannot concatenate along more than one dimension: %s. " "Multi-axis partition assign_fn is not supported " % str(partition_axes)) partition_ix = partition_axes[0] -- GitLab From 937ad7c27f0d289067c935543d282e5ac5a310b1 Mon Sep 17 00:00:00 2001 From: Niranjan Hasabnis Date: Tue, 25 Sep 2018 14:00:41 -0700 Subject: [PATCH 069/570] Adding check around AddFunctionLibrary --- tensorflow/core/kernels/partitioned_function_ops.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/partitioned_function_ops.cc b/tensorflow/core/kernels/partitioned_function_ops.cc index 7a5a2ff8fa..fdb4c84c46 100644 --- a/tensorflow/core/kernels/partitioned_function_ops.cc +++ b/tensorflow/core/kernels/partitioned_function_ops.cc @@ -102,7 +102,8 @@ class PartitionedCallOp : public AsyncOpKernel { // by name. auto graph = tensorflow::MakeUnique(fbody->graph->flib_def()); FunctionLibraryDefinition global_flib(OpRegistry::Global(), {}); - graph.get()->AddFunctionLibrary(global_flib.ToProto()); + TF_CHECK_OK( + graph.get()->AddFunctionLibrary(global_flib.ToProto())); CopyGraph(*fbody->graph, graph.get()); OP_REQUIRES_OK_ASYNC(ctx, PinResourceArgs(graph.get(), args), done); @@ -258,7 +259,8 @@ class PartitionedCallOp : public AsyncOpKernel { for (const auto& partition : partitions) { std::unique_ptr subgraph(new Graph(graph->flib_def())); FunctionLibraryDefinition global_flib(OpRegistry::Global(), {}); - subgraph.get()->AddFunctionLibrary(global_flib.ToProto()); + TF_CHECK_OK( + subgraph.get()->AddFunctionLibrary(global_flib.ToProto())); GraphConstructorOptions opts; opts.allow_internal_ops = true; opts.expect_device_spec = true; -- GitLab From 7630e9df4804a01f5dd0ab20d4c0bcfb58e45432 Mon Sep 17 00:00:00 2001 From: Richard Yu Date: Tue, 25 Sep 2018 15:50:13 -0700 Subject: [PATCH 070/570] Fixing error --- tensorflow/contrib/quantize/python/fold_batch_norms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index d882b79892..d9f179bee4 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -628,7 +628,7 @@ def _GetBatchNormParams(graph, context, has_scaling): bn_decay_var_tensor = _FindMatchingTensor(graph, op_suffix_bn_decay_var, context) if batch_mean_tensor is None and moving_mean_tensor is None: - raise ValueError('Error folding unfused batch norms') + ValueError('Error folding unfused batch norms') if has_scaling: gamma_tensor = _FindMatchingTensor(graph, op_suffix_gamma, context) -- GitLab From f55e5ef27b3ccf1b75932e219f7358976dbf56c2 Mon Sep 17 00:00:00 2001 From: IMBurbank Date: Tue, 25 Sep 2018 18:39:11 -0600 Subject: [PATCH 071/570] Update to use python 2-3 compatible function tf_inspect.getfullargspec. --- .../python/losses/python/tuple_losses_impl.py | 2 +- .../labeled_tensor/python/ops/_typecheck.py | 2 +- .../layers/python/layers/rev_block_lib.py | 3 +- .../python/learn/estimators/estimator.py | 4 +- .../learn/python/learn/estimators/head.py | 2 +- .../learn/python/learn/experiment_test.py | 2 +- .../learn/python/learn/export_strategy.py | 2 +- .../contrib/learn/python/learn/metric_spec.py | 2 +- .../contrib/learn/python/learn/monitors.py | 2 +- .../contrib/tpu/python/tpu/tpu_function.py | 2 +- tensorflow/python/framework/errors_impl.py | 2 +- tensorflow/python/framework/function.py | 6 +- tensorflow/python/keras/backend_test.py | 2 +- tensorflow/python/keras/testing_utils.py | 2 +- .../kernel_tests/variable_scope_test.py | 4 +- tensorflow/python/ops/variable_scope.py | 4 +- tensorflow/python/util/tf_contextlib_test.py | 2 +- tensorflow/python/util/tf_inspect.py | 7 +- tensorflow/python/util/tf_inspect_test.py | 249 +++++++++++++++++- .../api/lib/python_object_to_proto_visitor.py | 2 +- 20 files changed, 267 insertions(+), 36 deletions(-) diff --git a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py index 221c70c38b..00a83e5e55 100644 --- a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py +++ b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py @@ -101,7 +101,7 @@ def _args_to_gan_model(loss_fn): """ # Match arguments in `loss_fn` to elements of `namedtuple`. # TODO(joelshor): Properly handle `varargs` and `keywords`. - argspec = tf_inspect.getargspec(loss_fn) + argspec = tf_inspect.getfullargspec(loss_fn) defaults = argspec.defaults or [] required_args = set(argspec.args[:-len(defaults)]) diff --git a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py index 80fa17ec1f..0e23039847 100644 --- a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py +++ b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py @@ -230,7 +230,7 @@ def accepts(*types): def check_accepts(f): """Check the types.""" - spec = tf_inspect.getargspec(f) + spec = tf_inspect.getfullargspec(f) num_function_arguments = len(spec.args) if len(types) != num_function_arguments: diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py index 06da32072f..55979cc391 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py @@ -576,7 +576,8 @@ def _recomputing_grad_fn(compute_fn, def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False): """See recompute_grad.""" - has_is_recompute_kwarg = "is_recomputing" in tf_inspect.getargspec(fn).args + has_is_recompute_kwarg = ( + "is_recomputing" in tf_inspect.getfullargspec(fn).args) for arg in args: if not isinstance(arg, framework_ops.Tensor): raise ValueError("All inputs to function must be Tensors") diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index c1de42782e..b88923bca2 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -199,11 +199,11 @@ def _model_fn_args(fn): if hasattr(fn, 'func') and hasattr(fn, 'keywords') and hasattr(fn, 'args'): # Handle functools.partial and similar objects. return tuple([ - arg for arg in tf_inspect.getargspec(fn.func).args[len(fn.args):] + arg for arg in tf_inspect.getfullargspec(fn.func).args[len(fn.args):] if arg not in set(fn.keywords.keys()) ]) # Handle function. - return tuple(tf_inspect.getargspec(fn).args) + return tuple(tf_inspect.getfullargspec(fn).args) def _get_replica_device_setter(config): diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index c6f79e00d5..63dd08316b 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -1861,7 +1861,7 @@ def _get_arguments(func): _, func = tf_decorator.unwrap(func) if hasattr(func, "__code__"): # Regular function. - return tf_inspect.getargspec(func) + return tf_inspect.getfullargspec(func) elif hasattr(func, "func"): # Partial function. return _get_arguments(func.func) diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py index fb16c94c29..6926696fb6 100644 --- a/tensorflow/contrib/learn/python/learn/experiment_test.py +++ b/tensorflow/contrib/learn/python/learn/experiment_test.py @@ -126,7 +126,7 @@ class TestBaseEstimator(object): def _check_method_supports_args(method, kwargs): """Checks that the given method supports the given args.""" - supported_args = tuple(tf_inspect.getargspec(method).args) + supported_args = tuple(tf_inspect.getfullargspec(method).args) for kwarg in kwargs: if kwarg not in supported_args: raise ValueError( diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py index 075cab536e..0d6e0cdc18 100644 --- a/tensorflow/contrib/learn/python/learn/export_strategy.py +++ b/tensorflow/contrib/learn/python/learn/export_strategy.py @@ -96,7 +96,7 @@ class ExportStrategy( """ # don't break existing export_fns that don't accept checkpoint_path and # eval_result - export_fn_args = tf_inspect.getargspec(self.export_fn).args + export_fn_args = tf_inspect.getfullargspec(self.export_fn).args kwargs = {} if 'checkpoint_path' in export_fn_args: kwargs['checkpoint_path'] = checkpoint_path diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py index 97220365d5..604d6d46b4 100644 --- a/tensorflow/contrib/learn/python/learn/metric_spec.py +++ b/tensorflow/contrib/learn/python/learn/metric_spec.py @@ -51,7 +51,7 @@ def _args(fn): return tuple( [arg for arg in _args(fn.func) if arg not in set(fn.keywords.keys())]) # Handle function. - return tuple(tf_inspect.getargspec(fn).args) + return tuple(tf_inspect.getfullargspec(fn).args) _CANONICAL_LABELS_ARG = 'labels' diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py index 3d691d4340..5f61e0264f 100644 --- a/tensorflow/contrib/learn/python/learn/monitors.py +++ b/tensorflow/contrib/learn/python/learn/monitors.py @@ -1303,7 +1303,7 @@ class RunHookAdapterForMonitors(session_run_hook.SessionRunHook): def end(self, session): self._last_step = None for m in self._monitors: - if "session" in tf_inspect.getargspec(m.end).args: + if "session" in tf_inspect.getfullargspec(m.end).args: m.end(session=session) else: m.end() diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_function.py b/tensorflow/contrib/tpu/python/tpu/tpu_function.py index 0c7a38dbbb..9c4bd1c4d1 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_function.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_function.py @@ -80,7 +80,7 @@ def check_function_argument_count(func, input_arity, infeed_queue): number_of_arguments_needed = input_arity if infeed_queue is not None: number_of_arguments_needed += infeed_queue.number_of_tuple_elements - arg_spec = tf_inspect.getargspec(func) + arg_spec = tf_inspect.getfullargspec(func) number_of_args = len(arg_spec.args) if arg_spec.defaults is None: number_of_defaults = 0 diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py index 5af71f2cfb..c373e75a74 100644 --- a/tensorflow/python/framework/errors_impl.py +++ b/tensorflow/python/framework/errors_impl.py @@ -55,7 +55,7 @@ class OpError(Exception): def __reduce__(self): # Allow the subclasses to accept less arguments in their __init__. - init_argspec = tf_inspect.getargspec(self.__class__.__init__) + init_argspec = tf_inspect.getfullargspec(self.__class__.__init__) args = tuple(getattr(self, arg) for arg in init_argspec.args[1:]) return self.__class__, args diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index f287289bd0..3db6f683c9 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -132,9 +132,9 @@ class Defun(object): raise ValueError("func %s must be callable" % func) # Func should not use kwargs and defaults. - argspec = tf_inspect.getargspec(func) - if argspec.keywords or argspec.defaults: - raise ValueError("Functions with argument defaults or keyword " + argspec = tf_inspect.getfullargspec(func) + if argspec.varkw or argspec.defaults: + raise ValueError("Functions with argument defaults or varkw " "arguments are not supported.") # Computes how many arguments 'func' has. diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index ab71589940..31191d0d35 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -452,7 +452,7 @@ class BackendLinearAlgebraTest(test.TestCase): compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5), keras_kwargs={'axis': -1}, np_kwargs={'axis': -1}) - if 'keepdims' in tf_inspect.getargspec(keras_op).args: + if 'keepdims' in tf_inspect.getfullargspec(keras_op).args: compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5), keras_kwargs={'axis': 1, diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py index 501b50ba5f..1afaba5653 100644 --- a/tensorflow/python/keras/testing_utils.py +++ b/tensorflow/python/keras/testing_utils.py @@ -102,7 +102,7 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, layer.set_weights(weights) # test and instantiation from weights - if 'weights' in tf_inspect.getargspec(layer_cls.__init__): + if 'weights' in tf_inspect.getfullargspec(layer_cls.__init__): kwargs['weights'] = weights layer = layer_cls(**kwargs) diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 401e1ae102..1d0b72b17a 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -998,8 +998,8 @@ class VariableScopeTest(test.TestCase): def testSignatureGetVarVsGetLocalVar(self): """get_{local,}variable() must take the same list of args.""" - arg_names = tf_inspect.getargspec(variable_scope.get_variable)[0] - local_arg_names = tf_inspect.getargspec( + arg_names = tf_inspect.getfullargspec(variable_scope.get_variable)[0] + local_arg_names = tf_inspect.getfullargspec( variable_scope.get_local_variable)[0] self.assertEqual(arg_names, local_arg_names) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index a43676cd70..3cc1eb916d 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -892,14 +892,14 @@ class _VariableStore(object): if shape and shape.is_fully_defined(): init_val = lambda: initializer( # pylint: disable=g-long-lambda shape.as_list(), dtype=dtype, partition_info=partition_info) - elif not tf_inspect.getargspec(initializer).args: + elif not tf_inspect.getfullargspec(initializer).args: init_val = initializer else: raise ValueError("You can only pass an initializer function that " "expects no arguments to its callable when the " "shape is not fully defined. The given initializer " "function expects the following args %s" % - tf_inspect.getargspec(initializer).args) + tf_inspect.getfullargspec(initializer).args) variable_dtype = dtype.base_dtype # Create the variable. diff --git a/tensorflow/python/util/tf_contextlib_test.py b/tensorflow/python/util/tf_contextlib_test.py index 4a5bf388a6..1e921b5ea3 100644 --- a/tensorflow/python/util/tf_contextlib_test.py +++ b/tensorflow/python/util/tf_contextlib_test.py @@ -83,7 +83,7 @@ class TfContextlibTest(test.TestCase): self.assertFalse(isinstance(target, tf_decorator.TFDecorator)) def testGetArgSpecReturnsWrappedArgSpec(self): - argspec = tf_inspect.getargspec(test_params_and_defaults) + argspec = tf_inspect.getfullargspec(test_params_and_defaults) self.assertEqual(['a', 'b', 'c', 'd'], argspec.args) self.assertEqual((2, True, 'hello'), argspec.defaults) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index 967c872c2a..234850ac3f 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -43,7 +43,12 @@ def currentframe(): def getargspec(obj): - """TFDecorator-aware replacement for inspect.getargspec. + """TFDecorator-aware replacement for `inspect.getargspec`. + + This should not be called from other modules. It is deprecated in python3. + + Use `getfullargspec`. It is a TFDecorator-aware replacement for + `inspect.getfullargspec` compatible with both python2 and python3. Args: obj: A function, partial function, or callable object, possibly diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py index d3b7e4b969..55f88f8fc6 100644 --- a/tensorflow/python/util/tf_inspect_test.py +++ b/tensorflow/python/util/tf_inspect_test.py @@ -122,18 +122,6 @@ class TfInspectTest(test.TestCase): self.assertEqual(argspec, tf_inspect.getargspec(partial_func)) - def testGetFullArgsSpecForPartial(self): - - def func(a, b): - del a, b - - partial_function = functools.partial(func, 1) - argspec = tf_inspect.FullArgSpec( - args=['b'], varargs=None, varkw=None, defaults=None, - kwonlyargs=[], kwonlydefaults=None, annotations={}) - - self.assertEqual(argspec, tf_inspect.getfullargspec(partial_function)) - def testGetArgSpecOnPartialInvalidArgspec(self): """Tests getargspec on partial function that doesn't have valid argspec.""" @@ -303,6 +291,243 @@ class TfInspectTest(test.TestCase): self.assertEqual(argspec, tf_inspect.getargspec(NewClass)) + def testGetFullArgSpecOnDecoratorsThatDontProvideFullArgSpec(self): + argspec = tf_inspect.getfullargspec( + test_decorated_function_with_defaults) + self.assertEqual(['a', 'b', 'c'], argspec.args) + self.assertEqual((2, 'Hello'), argspec.defaults) + + def testGetFullArgSpecOnDecoratorThatChangesFullArgSpec(self): + argspec = tf_inspect.FullArgSpec( + args=['a', 'b', 'c'], + varargs=None, + varkw=None, + defaults=(1, 'hello'), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) + + decorator = tf_decorator.TFDecorator('', test_undecorated_function, '', + argspec) + self.assertEqual(argspec, tf_inspect.getfullargspec(decorator)) + + def testGetFullArgSpecIgnoresDecoratorsThatDontProvideFullArgSpec(self): + argspec = tf_inspect.FullArgSpec( + args=['a', 'b', 'c'], + varargs=None, + varkw=None, + defaults=(1, 'hello'), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) + + inner_decorator = tf_decorator.TFDecorator('', test_undecorated_function, + '', argspec) + outer_decorator = tf_decorator.TFDecorator('', inner_decorator) + self.assertEqual(argspec, tf_inspect.getfullargspec(outer_decorator)) + + def testGetFullArgSpecReturnsOutermostDecoratorThatChangesFullArgSpec(self): + outer_argspec = tf_inspect.FullArgSpec( + args=['a'], varargs=None, varkw=None, defaults=None, + kwonlyargs=[], kwonlydefaults=None, annotations={}) + inner_argspec = tf_inspect.FullArgSpec( + args=['b'], varargs=None, varkw=None, defaults=None, + kwonlyargs=[], kwonlydefaults=None, annotations={}) + + inner_decorator = tf_decorator.TFDecorator('', test_undecorated_function, + '', inner_argspec) + outer_decorator = tf_decorator.TFDecorator('', inner_decorator, '', + outer_argspec) + self.assertEqual(outer_argspec, + tf_inspect.getfullargspec(outer_decorator)) + + def testGetFullArgsSpecForPartial(self): + + def func(a, b): + del a, b + + partial_function = functools.partial(func, 1) + argspec = tf_inspect.FullArgSpec( + args=['b'], varargs=None, varkw=None, defaults=None, + kwonlyargs=[], kwonlydefaults=None, annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(partial_function)) + + def testGetFullArgSpecOnPartialInvalidFullArgSpec(self): + """Tests getfullargspec. + + Tests on partial function that doesn't have valid fullargspec. + """ + + def func(m, n, l, k=4): + return 2 * m + l + n * k + + partial_func = functools.partial(func, n=7) + + exception_message = (r"Some arguments \['l'\] do not have default value, " + "but they are positioned after those with default " + "values. This can not be expressed with ArgSpec.") + with self.assertRaisesRegexp(ValueError, exception_message): + tf_inspect.getfullargspec(partial_func) + + def testGetFullArgSpecOnPartialValidFullArgSpec(self): + """Tests getfullargspec on partial function with valid fullargspec.""" + + def func(m, n, l, k=4): + return 2 * m + l + n * k + + partial_func = functools.partial(func, n=7, l=2) + argspec = tf_inspect.FullArgSpec( + args=['m', 'n', 'l', 'k'], + varargs=None, + varkw=None, + defaults=(7, 2, 4), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) + + def testGetFullArgSpecOnPartialNoArgumentsLeft(self): + """Tests getfullargspec on partial function that prunes all arguments.""" + + def func(m, n): + return 2 * m + n + + partial_func = functools.partial(func, 7, 10) + argspec = tf_inspect.FullArgSpec( + args=[], varargs=None, varkw=None, defaults=None, + kwonlyargs=[], kwonlydefaults=None, annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) + + def testGetFullArgSpecOnPartialKeywordArgument(self): + """Tests getfullargspec on partial function that prunes some arguments.""" + + def func(m, n): + return 2 * m + n + + partial_func = functools.partial(func, n=7) + argspec = tf_inspect.FullArgSpec( + args=['m', 'n'], varargs=None, varkw=None, defaults=(7,), + kwonlyargs=[], kwonlydefaults=None, annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) + + def testGetFullArgSpecOnPartialKeywordArgumentWithDefaultValue(self): + """Tests getfullargspec. + + Tests on partial function that prunes argument by keyword. + """ + + def func(m=1, n=2): + return 2 * m + n + + partial_func = functools.partial(func, n=7) + argspec = tf_inspect.FullArgSpec( + args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7), + kwonlyargs=[], kwonlydefaults=None, annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) + + def testGetFullArgSpecOnPartialWithVarargs(self): + """Tests getfullargspec on partial function with variable arguments.""" + + def func(m, *arg): + return m + len(arg) + + partial_func = functools.partial(func, 7, 8) + argspec = tf_inspect.FullArgSpec( + args=[], varargs='arg', varkw=None, defaults=None, + kwonlyargs=[], kwonlydefaults=None, annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) + + def testGetFullArgSpecOnPartialWithVarkwargs(self): + """Tests getfullargspec. + + Tests on partial function with variable keyword arguments. + """ + + def func(m, n, **kwarg): + return m * n + len(kwarg) + + partial_func = functools.partial(func, 7) + argspec = tf_inspect.FullArgSpec( + args=['n'], varargs=None, varkw='kwarg', defaults=None, + kwonlyargs=[], kwonlydefaults=None, annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) + + def testGetFullArgSpecOnPartialWithDecorator(self): + """Tests getfullargspec on decorated partial function.""" + + @test_decorator('decorator') + def func(m=1, n=2): + return 2 * m + n + + partial_func = functools.partial(func, n=7) + argspec = tf_inspect.FullArgSpec( + args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7), + kwonlyargs=[], kwonlydefaults=None, annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) + + def testGetFullArgSpecOnCallableObject(self): + + class Callable(object): + + def __call__(self, a, b=1, c='hello'): + pass + + argspec = tf_inspect.FullArgSpec( + args=['self', 'a', 'b', 'c'], + varargs=None, + varkw=None, + defaults=(1, 'hello'), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) + + test_obj = Callable() + self.assertEqual(argspec, tf_inspect.getfullargspec(test_obj)) + + def testGetFullArgSpecOnInitClass(self): + + class InitClass(object): + + def __init__(self, a, b=1, c='hello'): + pass + + argspec = tf_inspect.FullArgSpec( + args=['self', 'a', 'b', 'c'], + varargs=None, + varkw=None, + defaults=(1, 'hello'), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(InitClass)) + + def testGetFullArgSpecOnNewClass(self): + + class NewClass(object): + + def __new__(cls, a, b=1, c='hello'): + pass + + argspec = tf_inspect.FullArgSpec( + args=['cls', 'a', 'b', 'c'], + varargs=None, + varkw=None, + defaults=(1, 'hello'), + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) + + self.assertEqual(argspec, tf_inspect.getfullargspec(NewClass)) + def testGetDoc(self): self.assertEqual('Test Decorated Function With Defaults Docstring.', tf_inspect.getdoc(test_decorated_function_with_defaults)) diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py index 3a48cf683c..2a40caf720 100644 --- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py +++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py @@ -47,7 +47,7 @@ def _SanitizedArgSpec(obj): string, a string representation of the argspec. """ output_string = '' - unsanitized_arg_spec = tf_inspect.getargspec(obj) + unsanitized_arg_spec = tf_inspect.getfullargspec(obj) for clean_attr in ('args', 'varargs', 'keywords'): output_string += '%s=%s, ' % (clean_attr, -- GitLab From 7c2341501a583ca625c976f118090e495cdcbe07 Mon Sep 17 00:00:00 2001 From: Jason Furmanek Date: Wed, 26 Sep 2018 04:44:12 +0000 Subject: [PATCH 072/570] Find NCCL2 debians in Tensorflow configure --- configure.py | 136 +++++++++++++++++++--------- third_party/nccl/nccl_configure.bzl | 14 ++- third_party/nccl/system.BUILD.tpl | 4 +- 3 files changed, 105 insertions(+), 49 deletions(-) diff --git a/configure.py b/configure.py index f0b9fada5e..9fd2dc2630 100644 --- a/configure.py +++ b/configure.py @@ -54,6 +54,12 @@ _TF_BAZELRC_FILENAME = '.tf_configure.bazelrc' _TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME) _TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE') +NCCL_LIB_PATHS = [ + "lib64/", + "lib/powerpc64le-linux-gnu/", + "lib/x86_64-linux-gnu/", + "" +] class UserInputError(Exception): pass @@ -1085,7 +1091,7 @@ def set_tf_tensorrt_install_path(environ_cp): def set_tf_nccl_install_path(environ_cp): - """Set NCCL_INSTALL_PATH and TF_NCCL_VERSION. + """Set NCCL_INSTALL_PATH, NCCL_HDR_PATH and TF_NCCL_VERSION. Args: environ_cp: copy of the os.environ. @@ -1111,46 +1117,98 @@ def set_tf_nccl_install_path(environ_cp): if tf_nccl_version == '1': break # No need to get install path, NCCL 1 is a GitHub repo. - # TODO(csigg): Look with ldconfig first if we can find the library in paths + # Look with ldconfig first if we can find the library in paths # like /usr/lib/x86_64-linux-gnu and the header file in the corresponding # include directory. This is where the NCCL .deb packages install them. - # Then ask the user if we should use that. Instead of a single - # NCCL_INSTALL_PATH, pass separate NCCL_LIB_PATH and NCCL_HDR_PATH to - # nccl_configure.bzl - default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') - ask_nccl_path = (r'Please specify the location where NCCL %s library is ' + + # First check to see if NCCL is in the ldconfig. + # If its found, use that location. + if is_linux(): + ldconfig_bin = which('ldconfig') or '/sbin/ldconfig' + nccl2_path_from_ldconfig = run_shell([ldconfig_bin, '-p']) + nccl2_path_from_ldconfig = re.search('.*libnccl.so .* => (.*)', + nccl2_path_from_ldconfig) + if nccl2_path_from_ldconfig: + nccl2_path_from_ldconfig = nccl2_path_from_ldconfig.group(1) + if os.path.exists('%s.%s' % (nccl2_path_from_ldconfig, tf_nccl_version)): + nccl_install_path = os.path.dirname(nccl2_path_from_ldconfig) + print('NCCL libraries found in ' + nccl2_path_from_ldconfig) + + # Check if this is the main system lib location + if re.search('.*linux-gnu', nccl_install_path): + trunc_nccl_install_path = "/usr" + print("This looks like a system path.") + else: + trunc_nccl_install_path = nccl_install_path + "/.." + + # Look for header + nccl_hdr_path = trunc_nccl_install_path + "/include" + print("Assuming NCCL header path is " + nccl_hdr_path) + if os.path.exists(nccl_hdr_path + "/nccl.h"): + # Set NCCL_INSTALL_PATH + environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path + write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) + + # Set NCCL_HDR_PATH + environ_cp['NCCL_HDR_PATH'] = nccl_hdr_path + write_action_env_to_bazelrc('NCCL_HDR_PATH', nccl_hdr_path) + break + else: + print('The header for NCCL2 cannot be found. Please install the libnccl-dev package.') + else: + print('NCCL2 is listed by ldconfig but the library is not found. ' + 'Your ldconfig is out of date. Please run sudo ldconfig.') + else: + # NCCL is not found in ldconfig. Ask the user for the location. + default_nccl_path = environ_cp.get('CUDA_TOOLKIT_PATH') + ask_nccl_path = (r'Please specify the location where NCCL %s library is ' 'installed. Refer to README.md for more details. [Default ' 'is %s]:') % (tf_nccl_version, default_nccl_path) - nccl_install_path = get_from_env_or_user_or_default( + nccl_install_path = get_from_env_or_user_or_default( environ_cp, 'NCCL_INSTALL_PATH', ask_nccl_path, default_nccl_path) - # Result returned from "read" will be used unexpanded. That make "~" - # unusable. Going through one more level of expansion to handle that. - nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path)) - if is_windows() or is_cygwin(): - nccl_install_path = cygpath(nccl_install_path) - - if is_windows(): - nccl_lib_path = 'lib/x64/nccl.lib' - elif is_linux(): - nccl_lib_path = 'lib/libnccl.so.%s' % tf_nccl_version - elif is_macos(): - nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version - - nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) - nccl_hdr_path = os.path.join(nccl_install_path, 'include/nccl.h') - if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path): - # Set NCCL_INSTALL_PATH - environ_cp['NCCL_INSTALL_PATH'] = nccl_install_path - write_action_env_to_bazelrc('NCCL_INSTALL_PATH', nccl_install_path) - break - - # Reset and Retry - print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' - 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, + # Result returned from "read" will be used unexpanded. That make "~" + # unusable. Going through one more level of expansion to handle that. + nccl_install_path = os.path.realpath(os.path.expanduser(nccl_install_path)) + if is_windows() or is_cygwin(): + nccl_install_path = cygpath(nccl_install_path) + + if is_windows(): + nccl_lib_path = 'lib/x64/nccl.lib' + elif is_linux(): + nccl_lib_filename = 'libnccl.so.%s' % tf_nccl_version + nccl_lpath = '%s/lib/%s' % (nccl_install_path, nccl_lib_filename) + if not os.path.exists(nccl_lpath): + for relative_path in NCCL_LIB_PATHS: + path = '%s/%s%s' % (nccl_install_path, relative_path, nccl_lib_filename) + if os.path.exists(path): + print("NCCL found at " + path) + nccl_lib_path = path + break + else: + nccl_lib_path = nccl_lpath + elif is_macos(): + nccl_lib_path = 'lib/libnccl.%s.dylib' % tf_nccl_version + + nccl_lib_path = os.path.join(nccl_install_path, nccl_lib_path) + nccl_hdr_path = os.path.join(os.path.dirname(nccl_lib_path), '../include/nccl.h') + print("Assuming NCCL header path is "+nccl_hdr_path) + if os.path.exists(nccl_lib_path) and os.path.exists(nccl_hdr_path): + # Set NCCL_INSTALL_PATH + environ_cp['NCCL_INSTALL_PATH'] = os.path.dirname(nccl_lib_path) + write_action_env_to_bazelrc('NCCL_INSTALL_PATH', os.path.dirname(nccl_lib_path)) + + # Set NCCL_HDR_PATH + environ_cp['NCCL_HDR_PATH'] = os.path.dirname(nccl_hdr_path) + write_action_env_to_bazelrc('NCCL_HDR_PATH', os.path.dirname(nccl_hdr_path)) + break + + # Reset and Retry + print('Invalid path to NCCL %s toolkit, %s or %s not found. Please use the ' + 'O/S agnostic package of NCCL 2' % (tf_nccl_version, nccl_lib_path, nccl_hdr_path)) - environ_cp['TF_NCCL_VERSION'] = '' + environ_cp['TF_NCCL_VERSION'] = '' else: raise UserInputError('Invalid TF_NCCL setting was provided %d ' 'times in a row. Assuming to be a scripting mistake.' % @@ -1401,20 +1459,10 @@ def set_grpc_build_flags(): def set_system_libs_flag(environ_cp): syslibs = environ_cp.get('TF_SYSTEM_LIBS', '') + syslibs = ','.join(sorted(syslibs.split(','))) if syslibs and syslibs != '': - if ',' in syslibs: - syslibs = ','.join(sorted(syslibs.split(','))) - else: - syslibs = ','.join(sorted(syslibs.split())) write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs) - if 'PREFIX' in environ_cp: - write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX']) - if 'LIBDIR' in environ_cp: - write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR']) - if 'INCLUDEDIR' in environ_cp: - write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR']) - def set_windows_build_flags(environ_cp): """Set Windows specific build options.""" diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index ce9447096e..0713b36724 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -5,6 +5,7 @@ * `TF_NCCL_VERSION`: The NCCL version. * `NCCL_INSTALL_PATH`: The installation path of the NCCL library. + * `NCCL_HDR_PATH`: The installation path of the NCCL header files. """ load( @@ -15,6 +16,7 @@ load( ) _NCCL_INSTALL_PATH = "NCCL_INSTALL_PATH" +_NCCL_HDR_PATH = "NCCL_HDR_PATH" _TF_NCCL_VERSION = "TF_NCCL_VERSION" _TF_NCCL_CONFIG_REPO = "TF_NCCL_CONFIG_REPO" @@ -68,7 +70,7 @@ def _find_nccl_header(repository_ctx, nccl_install_path): return header_path -def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version): +def _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version): """Checks whether the header file matches the specified version of NCCL. Args: @@ -79,7 +81,9 @@ def _check_nccl_version(repository_ctx, nccl_install_path, nccl_version): Returns: A string containing the library version of NCCL. """ - header_path = _find_nccl_header(repository_ctx, nccl_install_path) + header_path = repository_ctx.path("%s/nccl.h" % nccl_hdr_path) + if not header_path.exists: + header_path = _find_nccl_header(repository_ctx, nccl_install_path) header_dir = str(header_path.realpath.dirname) major_version = find_cuda_define(repository_ctx, header_dir, "nccl.h", _DEFINE_NCCL_MAJOR) @@ -109,6 +113,7 @@ def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version): """ lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path, nccl_version)) + if not lib_path.exists: auto_configure_fail("Cannot find NCCL library %s" % str(lib_path)) return lib_path @@ -138,10 +143,12 @@ def _nccl_configure_impl(repository_ctx): else: # Create target for locally installed NCCL. nccl_install_path = repository_ctx.os.environ[_NCCL_INSTALL_PATH].strip() - _check_nccl_version(repository_ctx, nccl_install_path, nccl_version) + nccl_hdr_path = repository_ctx.os.environ[_NCCL_HDR_PATH].strip() + _check_nccl_version(repository_ctx, nccl_install_path, nccl_hdr_path, nccl_version) repository_ctx.template("BUILD", _NCCL_LOCAL_BUILD_TEMPLATE, { "%{version}": nccl_version, "%{install_path}": nccl_install_path, + "%{hdr_path}": nccl_hdr_path, }) @@ -149,6 +156,7 @@ nccl_configure = repository_rule( implementation=_nccl_configure_impl, environ=[ _NCCL_INSTALL_PATH, + _NCCL_HDR_PATH, _TF_NCCL_VERSION, ], ) diff --git a/third_party/nccl/system.BUILD.tpl b/third_party/nccl/system.BUILD.tpl index 7ca835dedf..a07f54955f 100644 --- a/third_party/nccl/system.BUILD.tpl +++ b/third_party/nccl/system.BUILD.tpl @@ -20,7 +20,7 @@ genrule( "libnccl.so.%{version}", "nccl.h", ], - cmd = """cp "%{install_path}/include/nccl.h" "$(@D)/nccl.h" && - cp "%{install_path}/lib/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """, + cmd = """cp "%{hdr_path}/nccl.h" "$(@D)/nccl.h" && + cp "%{install_path}/libnccl.so.%{version}" "$(@D)/libnccl.so.%{version}" """, ) -- GitLab From 96eec07af06f4dfc75cee57b74ba4b5347619634 Mon Sep 17 00:00:00 2001 From: Cao Zongyan Date: Wed, 26 Sep 2018 13:04:46 +0800 Subject: [PATCH 073/570] Re-add compat module for leaky_relu implementation. --- tensorflow/python/ops/nn_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 3f64f0af9a..78e000e458 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -22,6 +22,7 @@ import numbers import numpy as np +from tensorflow.python.compat import compat from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import graph_util -- GitLab From d59678448469ca134875e062f7f8d6d77942af4e Mon Sep 17 00:00:00 2001 From: Jason Furmanek Date: Wed, 26 Sep 2018 05:19:10 +0000 Subject: [PATCH 074/570] fix unintential removal of set_system_libs_flag --- configure.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/configure.py b/configure.py index 9fd2dc2630..3791ead3ed 100644 --- a/configure.py +++ b/configure.py @@ -1459,10 +1459,20 @@ def set_grpc_build_flags(): def set_system_libs_flag(environ_cp): syslibs = environ_cp.get('TF_SYSTEM_LIBS', '') - syslibs = ','.join(sorted(syslibs.split(','))) if syslibs and syslibs != '': + if ',' in syslibs: + syslibs = ','.join(sorted(syslibs.split(','))) + else: + syslibs = ','.join(sorted(syslibs.split())) write_action_env_to_bazelrc('TF_SYSTEM_LIBS', syslibs) + if 'PREFIX' in environ_cp: + write_to_bazelrc('build --define=PREFIX=%s' % environ_cp['PREFIX']) + if 'LIBDIR' in environ_cp: + write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR']) + if 'INCLUDEDIR' in environ_cp: +write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR']) + def set_windows_build_flags(environ_cp): """Set Windows specific build options.""" -- GitLab From 1668d28ca3558f3bc4fcf94752799712211f219e Mon Sep 17 00:00:00 2001 From: Jason Furmanek Date: Wed, 26 Sep 2018 05:22:04 +0000 Subject: [PATCH 075/570] fix in last line of set_system_lib_flag --- configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.py b/configure.py index 3791ead3ed..b1ab55b657 100644 --- a/configure.py +++ b/configure.py @@ -1471,7 +1471,7 @@ def set_system_libs_flag(environ_cp): if 'LIBDIR' in environ_cp: write_to_bazelrc('build --define=LIBDIR=%s' % environ_cp['LIBDIR']) if 'INCLUDEDIR' in environ_cp: -write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR']) + write_to_bazelrc('build --define=INCLUDEDIR=%s' % environ_cp['INCLUDEDIR']) def set_windows_build_flags(environ_cp): -- GitLab From bd2524f16f3722cce2360ec5f7122c6b6f1ead49 Mon Sep 17 00:00:00 2001 From: Koan-Sin Tan Date: Wed, 26 Sep 2018 13:23:14 +0800 Subject: [PATCH 076/570] fix unbalanced delimiter in benchmark_model doc as reported in https://github.com/tensorflow/tensorflow/issues/22499, there is unbalanced delimiter `"` --- tensorflow/tools/benchmark/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/benchmark/README.md b/tensorflow/tools/benchmark/README.md index e64af2bfe1..dee1a20f3f 100644 --- a/tensorflow/tools/benchmark/README.md +++ b/tensorflow/tools/benchmark/README.md @@ -32,7 +32,7 @@ adb push bazel-bin/tensorflow/tools/benchmark/benchmark_model /data/local/tmp (4) Run the benchmark. For example: ``` -adb shell "/data/local/tmp/benchmark_model \ +adb shell /data/local/tmp/benchmark_model \ --graph=/data/local/tmp/tensorflow_inception_graph.pb \ --input_layer="input:0" \ --input_layer_shape="1,224,224,3" \ -- GitLab From 09bf8eb99cd76c506dcd2a0e8c8e893f7f3916b1 Mon Sep 17 00:00:00 2001 From: Jason Furmanek Date: Wed, 26 Sep 2018 05:26:54 +0000 Subject: [PATCH 077/570] white space removal --- third_party/nccl/nccl_configure.bzl | 1 - 1 file changed, 1 deletion(-) diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index 0713b36724..d78fe8f3aa 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -113,7 +113,6 @@ def _find_nccl_lib(repository_ctx, nccl_install_path, nccl_version): """ lib_path = repository_ctx.path("%s/lib/libnccl.so.%s" % (nccl_install_path, nccl_version)) - if not lib_path.exists: auto_configure_fail("Cannot find NCCL library %s" % str(lib_path)) return lib_path -- GitLab From fa76895ad577246a8ab241e668765cad651558fb Mon Sep 17 00:00:00 2001 From: Isaac Burbank Date: Wed, 26 Sep 2018 11:20:44 -0600 Subject: [PATCH 078/570] Update python_object_to_proto_visitor.py Changed test key for FullArgSpec to check for `varkw`, replacing the old ArgSpec key `keywords` --- tensorflow/tools/api/lib/python_object_to_proto_visitor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py index 2a40caf720..a8e69fda4f 100644 --- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py +++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py @@ -49,7 +49,7 @@ def _SanitizedArgSpec(obj): output_string = '' unsanitized_arg_spec = tf_inspect.getfullargspec(obj) - for clean_attr in ('args', 'varargs', 'keywords'): + for clean_attr in ('args', 'varargs', 'varkw'): output_string += '%s=%s, ' % (clean_attr, getattr(unsanitized_arg_spec, clean_attr)) -- GitLab From 5bbcdb8a58efd97b0f73927218d5896da67f5203 Mon Sep 17 00:00:00 2001 From: Isaac Burbank Date: Wed, 26 Sep 2018 11:34:38 -0600 Subject: [PATCH 079/570] Update tf_inspect_test.py Remove subsection of added tests that were problematic. --- tensorflow/python/util/tf_inspect_test.py | 78 ----------------------- 1 file changed, 78 deletions(-) diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py index 55f88f8fc6..ba9430c756 100644 --- a/tensorflow/python/util/tf_inspect_test.py +++ b/tensorflow/python/util/tf_inspect_test.py @@ -353,41 +353,6 @@ class TfInspectTest(test.TestCase): self.assertEqual(argspec, tf_inspect.getfullargspec(partial_function)) - def testGetFullArgSpecOnPartialInvalidFullArgSpec(self): - """Tests getfullargspec. - - Tests on partial function that doesn't have valid fullargspec. - """ - - def func(m, n, l, k=4): - return 2 * m + l + n * k - - partial_func = functools.partial(func, n=7) - - exception_message = (r"Some arguments \['l'\] do not have default value, " - "but they are positioned after those with default " - "values. This can not be expressed with ArgSpec.") - with self.assertRaisesRegexp(ValueError, exception_message): - tf_inspect.getfullargspec(partial_func) - - def testGetFullArgSpecOnPartialValidFullArgSpec(self): - """Tests getfullargspec on partial function with valid fullargspec.""" - - def func(m, n, l, k=4): - return 2 * m + l + n * k - - partial_func = functools.partial(func, n=7, l=2) - argspec = tf_inspect.FullArgSpec( - args=['m', 'n', 'l', 'k'], - varargs=None, - varkw=None, - defaults=(7, 2, 4), - kwonlyargs=[], - kwonlydefaults=None, - annotations={}) - - self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) - def testGetFullArgSpecOnPartialNoArgumentsLeft(self): """Tests getfullargspec on partial function that prunes all arguments.""" @@ -401,35 +366,6 @@ class TfInspectTest(test.TestCase): self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) - def testGetFullArgSpecOnPartialKeywordArgument(self): - """Tests getfullargspec on partial function that prunes some arguments.""" - - def func(m, n): - return 2 * m + n - - partial_func = functools.partial(func, n=7) - argspec = tf_inspect.FullArgSpec( - args=['m', 'n'], varargs=None, varkw=None, defaults=(7,), - kwonlyargs=[], kwonlydefaults=None, annotations={}) - - self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) - - def testGetFullArgSpecOnPartialKeywordArgumentWithDefaultValue(self): - """Tests getfullargspec. - - Tests on partial function that prunes argument by keyword. - """ - - def func(m=1, n=2): - return 2 * m + n - - partial_func = functools.partial(func, n=7) - argspec = tf_inspect.FullArgSpec( - args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7), - kwonlyargs=[], kwonlydefaults=None, annotations={}) - - self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) - def testGetFullArgSpecOnPartialWithVarargs(self): """Tests getfullargspec on partial function with variable arguments.""" @@ -459,20 +395,6 @@ class TfInspectTest(test.TestCase): self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) - def testGetFullArgSpecOnPartialWithDecorator(self): - """Tests getfullargspec on decorated partial function.""" - - @test_decorator('decorator') - def func(m=1, n=2): - return 2 * m + n - - partial_func = functools.partial(func, n=7) - argspec = tf_inspect.FullArgSpec( - args=['m', 'n'], varargs=None, varkw=None, defaults=(1, 7), - kwonlyargs=[], kwonlydefaults=None, annotations={}) - - self.assertEqual(argspec, tf_inspect.getfullargspec(partial_func)) - def testGetFullArgSpecOnCallableObject(self): class Callable(object): -- GitLab From 941b4e0f226de76f083401842e73bd9efd6db2d0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 10:23:59 -0700 Subject: [PATCH 080/570] Fix support for custom optimizers in explicit schedule PiperOrigin-RevId: 214794973 --- .../grappler/optimizers/meta_optimizer.cc | 25 ++++++++++++++-- .../core/grappler/optimizers/meta_optimizer.h | 4 +++ .../optimizers/meta_optimizer_test.cc | 30 +++++++++++++++++++ .../core/protobuf/rewriter_config.proto | 4 +-- 4 files changed, 58 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index c59645e5f2..e18a5f21d2 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -172,11 +172,12 @@ Status MetaOptimizer::InitializeOptimizers( optimizers->push_back(MakeUnique( cfg_.scoped_allocator_optimization(), cfg_.scoped_allocator_opts())); } - return InitializeCustomGraphOptimizers(optimizers); + return InitializeCustomGraphOptimizers(std::set(), optimizers); } Status MetaOptimizer::InitializeOptimizersByName( std::vector>* optimizers) const { + std::set initialized_custom_optimizers; for (const string& optimizer_name : cfg_.optimizers()) { auto optimizer = MakeNewOptimizer(optimizer_name); if (optimizer) { @@ -190,18 +191,26 @@ Status MetaOptimizer::InitializeOptimizersByName( if (custom_optimizer) { VLOG(2) << "Registered custom graph optimizer: " << optimizer_name; - TF_RETURN_IF_ERROR(custom_optimizer->Init()); + TF_RETURN_IF_ERROR(custom_optimizer->Init( + GetCustomGraphOptimizerConfig(optimizer_name))); optimizers->push_back(std::move(custom_optimizer)); + initialized_custom_optimizers.insert(optimizer_name); } else { VLOG(2) << "Can't register an optimizer by name: " << optimizer_name; } } - return InitializeCustomGraphOptimizers(optimizers); + return InitializeCustomGraphOptimizers(initialized_custom_optimizers, + optimizers); } Status MetaOptimizer::InitializeCustomGraphOptimizers( + const std::set& pre_initialized_optimizers, std::vector>* optimizers) const { for (const auto& optimizer_config : cfg_.custom_optimizers()) { + if (pre_initialized_optimizers.find(optimizer_config.name()) != + pre_initialized_optimizers.end()) { + continue; + } // Initialize the ExperimentalImplementationSelector here instead of // CustomizeOptimizer registry, due the static link issue in TensorRT for // double registry. @@ -237,6 +246,16 @@ Status MetaOptimizer::InitializeCustomGraphOptimizers( return Status::OK(); } +const RewriterConfig::CustomGraphOptimizer* +MetaOptimizer::GetCustomGraphOptimizerConfig(const string& name) const { + for (const auto& config : cfg_.custom_optimizers()) { + if (config.name() == name) { + return &config; + } + } + return nullptr; +} + Status MetaOptimizer::OptimizeGraph(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { int min_graph_nodes = cfg_.min_graph_nodes() == 0 ? kDefaultMinGraphNodes diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.h b/tensorflow/core/grappler/optimizers/meta_optimizer.h index 831c5e37c0..99a0a33ffa 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.h +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.h @@ -54,7 +54,11 @@ class MetaOptimizer : public GraphOptimizer { std::vector>* optimizers) const; // Initialize active optimizers from RewriterConfig.custom_optimizers. Status InitializeCustomGraphOptimizers( + const std::set& pre_initialized_optimizers, std::vector>* optimizers) const; + // Returns the config for a custom graph optimizer. Null if none was found. + const RewriterConfig::CustomGraphOptimizer* GetCustomGraphOptimizerConfig( + const string& name) const; // Run optimization pass over a single GrapplerItem. Meta optimizer might run // multiple such passes: 1) for the main graph 2) for the function library diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc index e74e0f7501..c477c4d4b1 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc @@ -71,6 +71,17 @@ class TestGraphOptimizer : public TestOptimizer { REGISTER_GRAPH_OPTIMIZER(TestGraphOptimizer); +class TestOptimizerWithParams : public TestOptimizer { + public: + Status Init( + const tensorflow::RewriterConfig_CustomGraphOptimizer* config) override { + CHECK(config != nullptr); + return Status::OK(); + } +}; + +REGISTER_GRAPH_OPTIMIZER(TestOptimizerWithParams); + class MetaOptimizerTest : public GrapplerTest {}; TEST_F(MetaOptimizerTest, RunsCustomOptimizer) { @@ -90,6 +101,25 @@ TEST_F(MetaOptimizerTest, RunsCustomOptimizer) { EXPECT_TRUE(TestOptimizer::IsOptimized()); } +TEST_F(MetaOptimizerTest, RunsCustomOptimizerWithParams) { + TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); + GrapplerItem item; + CHECK(fake_input.NextItem(&item)); + + TestOptimizer::SetOptimized(false); + RewriterConfig rewriter_config; + rewriter_config.add_optimizers("TestOptimizerWithParams"); + auto* custom_config = rewriter_config.add_custom_optimizers(); + custom_config->set_name("TestOptimizerWithParams"); + (*custom_config->mutable_parameter_map())["foo"] = AttrValue(); + + MetaOptimizer optimizer(nullptr, rewriter_config); + GraphDef output; + const Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + EXPECT_TRUE(TestOptimizer::IsOptimized()); +} + TEST_F(MetaOptimizerTest, RunsCustomOptimizerAndCustomGraphOptimizer) { TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); GrapplerItem item; diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index bb8f88336d..482178a540 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -143,8 +143,8 @@ message RewriterConfig { // not configurable (in contrast to memory optimization passes through the // meta-optimizer) and act only on manual op annotations. // - // Custom registered optimizers will be run after the base optimizers, in - // the order that they are specified. + // Custom optimizers (see custom_optimizers) that are not part of this + // schedule will be run after - in the order that they were specified. repeated string optimizers = 100; // Message to describe custom graph optimizer and its parameters -- GitLab From 3002b10e29363854c6fc20d788bc65233fd5116f Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Thu, 27 Sep 2018 10:25:58 -0700 Subject: [PATCH 081/570] Update L2HMC graph benchmark to be more similar to eager benchmark. PiperOrigin-RevId: 214795331 --- .../eager/python/examples/l2hmc/l2hmc_test.py | 162 ++++++++++-------- 1 file changed, 91 insertions(+), 71 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py b/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py index c38a1597b8..1c925e455b 100644 --- a/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py +++ b/tensorflow/contrib/eager/python/examples/l2hmc/l2hmc_test.py @@ -45,6 +45,17 @@ def step(dynamics, optimizer, samples): return loss, samples +# To be defunnable, the function cannot return an Operation, so the above +# function is used for defun or eager, and this function is used in graph to be +# able to run the gradient updates. +def graph_step(dynamics, optimizer, samples): + loss, grads, samples, _ = l2hmc.loss_and_grads( + dynamics, samples, loss_fn=l2hmc.compute_loss) + train_op = optimizer.apply_gradients(zip(grads, dynamics.variables)) + + return train_op, loss, samples + + def warmup(dynamics, optimizer, n_iters=1, @@ -134,51 +145,48 @@ class L2hmcBenchmark(tf.test.Benchmark): """Benchmark Graph performance.""" hparams = get_default_hparams() - tf.reset_default_graph() - with tf.Graph().as_default(): - energy_fn, _, _ = l2hmc.get_scg_energy_fn() - dynamics = l2hmc.Dynamics( - x_dim=hparams.x_dim, - minus_loglikelihood_fn=energy_fn, - n_steps=hparams.n_steps, - eps=hparams.eps) - x = tf.placeholder(tf.float32, shape=[None, hparams.x_dim]) - loss, x_out, _ = l2hmc.compute_loss(dynamics, x) - - global_step = tf.Variable(0., name="global_step", trainable=False) - learning_rate = tf.train.exponential_decay( - hparams.learning_rate, global_step, 1000, 0.96, staircase=True) - optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) - train_op = optimizer.minimize(loss, global_step=global_step) - - # Single thread; fairer comparison against eager - session_conf = tf.ConfigProto( - intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) - - with tf.Session(config=session_conf) as sess: - sess.run(tf.global_variables_initializer()) - - # Warmup to reduce initialization effect when timing - samples = npr.normal(size=[hparams.n_samples, hparams.x_dim]) - for _ in range(hparams.n_warmup_iters): - _, _, _, _ = sess.run( - [x_out, loss, train_op, learning_rate], feed_dict={x: samples}) - - # Training - start_time = time.time() - for i in range(hparams.n_iters): - samples, loss_np, _, _ = sess.run( - [x_out, loss, train_op, learning_rate], feed_dict={x: samples}) - print("Iteration %d: loss %.4f" % (i, loss_np)) - wall_time = time.time() - start_time - examples_per_sec = hparams.n_samples / wall_time - - self.report_benchmark( - name="graph_train_%s" % ("gpu" - if tf.test.is_gpu_available() else "cpu"), - iters=hparams.n_iters, - extras={"examples_per_sec": examples_per_sec}, - wall_time=wall_time) + tf.enable_resource_variables() + for sample_size in [10, 25, 50, 100, 200]: + hparams.n_samples = sample_size + tf.reset_default_graph() + with tf.Graph().as_default(): + energy_fn, _, _ = l2hmc.get_scg_energy_fn() + x = tf.random_normal([hparams.n_samples, hparams.x_dim], + dtype=tf.float32) + dynamics = l2hmc.Dynamics( + x_dim=hparams.x_dim, + minus_loglikelihood_fn=energy_fn, + n_steps=hparams.n_steps, + eps=hparams.eps) + loss, _, _ = l2hmc.compute_loss(dynamics, x) + + optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) + train_op, loss, _ = graph_step(dynamics, optimizer, x) + + # Single thread; fairer comparison against eager + session_conf = tf.ConfigProto(inter_op_parallelism_threads=1) + + with tf.Session(config=session_conf) as sess: + sess.run(tf.global_variables_initializer()) + + # Warmup to reduce initialization effect when timing + for _ in range(hparams.n_warmup_iters): + _, _ = sess.run([train_op, loss]) + + # Training + start_time = time.time() + for i in range(hparams.n_iters): + _, loss_np = sess.run([train_op, loss]) + print("Iteration %d: loss %.4f" % (i, loss_np)) + wall_time = (time.time() - start_time) / hparams.n_iters + examples_per_sec = hparams.n_samples / wall_time + + self.report_benchmark( + name="graph_train_%s_%d" % + ("gpu" if tf.test.is_gpu_available() else "cpu", sample_size), + iters=hparams.n_iters, + extras={"examples_per_sec": examples_per_sec}, + wall_time=wall_time) def benchmark_eager(self): self._benchmark_eager() @@ -190,32 +198,44 @@ class L2hmcBenchmark(tf.test.Benchmark): """Benchmark Eager performance.""" hparams = get_default_hparams() - energy_fn, _, _ = l2hmc.get_scg_energy_fn() - dynamics = l2hmc.Dynamics( - x_dim=hparams.x_dim, - minus_loglikelihood_fn=energy_fn, - n_steps=hparams.n_steps, - eps=hparams.eps) - optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) - step_fn = tfe.defun(step) if defun else step - - # Warmup to reduce initialization effect when timing - warmup(dynamics, optimizer, n_iters=hparams.n_warmup_iters, step_fn=step_fn) - - # Training - samples = tf.random_normal( - shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32) - start_time = time.time() - fit(dynamics, samples, optimizer, step_fn=step_fn, n_iters=hparams.n_iters) - wall_time = time.time() - start_time - examples_per_sec = hparams.n_samples / wall_time - - self.report_benchmark( - name="eager_train_%s%s" % ("gpu" if tf.test.is_gpu_available() else - "cpu", "_defun" if defun else ""), - iters=hparams.n_iters, - extras={"examples_per_sec": examples_per_sec}, - wall_time=wall_time) + for sample_size in [10, 25, 50, 100, 200]: + hparams.n_samples = sample_size + energy_fn, _, _ = l2hmc.get_scg_energy_fn() + dynamics = l2hmc.Dynamics( + x_dim=hparams.x_dim, + minus_loglikelihood_fn=energy_fn, + n_steps=hparams.n_steps, + eps=hparams.eps) + optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate) + step_fn = tfe.defun(step) if defun else step + + # Warmup to reduce initialization effect when timing + warmup( + dynamics, + optimizer, + n_iters=hparams.n_warmup_iters, + n_samples=hparams.n_samples, + step_fn=step_fn) + + # Training + samples = tf.random_normal( + shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32) + start_time = time.time() + fit(dynamics, + samples, + optimizer, + step_fn=step_fn, + n_iters=hparams.n_iters) + wall_time = (time.time() - start_time) / hparams.n_iters + examples_per_sec = hparams.n_samples / wall_time + + self.report_benchmark( + name="eager_train_%s%s_%d" % + ("gpu" if tf.test.is_gpu_available() else "cpu", + "_defun" if defun else "", sample_size), + iters=hparams.n_iters, + extras={"examples_per_sec": examples_per_sec}, + wall_time=wall_time) del dynamics -- GitLab From 334244be6864dd1dbec9bc8bb4996cc286a8e3e3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 10:31:36 -0700 Subject: [PATCH 082/570] Add tf.strings.unicode_script, which detects the script of a unicode codepoint based on standard ranges. PiperOrigin-RevId: 214796357 --- .../base_api/api_def_UnicodeScript.pbtxt | 28 ++++++ .../python_api/api_def_UnicodeScript.pbtxt | 6 ++ tensorflow/core/kernels/BUILD | 12 +++ tensorflow/core/kernels/unicode_script_op.cc | 53 +++++++++++ tensorflow/core/ops/string_ops.cc | 5 ++ tensorflow/python/kernel_tests/BUILD | 12 +++ .../kernel_tests/unicode_script_op_test.py | 57 ++++++++++++ .../api/golden/v1/tensorflow.strings.pbtxt | 4 + .../api/golden/v2/tensorflow.strings.pbtxt | 4 + tensorflow/tools/lib_package/BUILD | 2 + tensorflow/tools/pip_package/BUILD | 1 + tensorflow/workspace.bzl | 2 + third_party/icu/BUILD | 1 + third_party/icu/BUILD.bazel | 88 +++++++++++++++++++ third_party/icu/workspace.bzl | 15 ++++ 15 files changed, 290 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_UnicodeScript.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_UnicodeScript.pbtxt create mode 100644 tensorflow/core/kernels/unicode_script_op.cc create mode 100644 tensorflow/python/kernel_tests/unicode_script_op_test.py create mode 100644 third_party/icu/BUILD create mode 100644 third_party/icu/BUILD.bazel create mode 100644 third_party/icu/workspace.bzl diff --git a/tensorflow/core/api_def/base_api/api_def_UnicodeScript.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnicodeScript.pbtxt new file mode 100644 index 0000000000..7898fe8d6b --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_UnicodeScript.pbtxt @@ -0,0 +1,28 @@ +op { + graph_op_name: "UnicodeScript" + endpoint { + name: "UnicodeScript" + } + in_arg { + name: "input" + description: <input("input", &input_tensor)); + const auto& input_flat = input_tensor->flat(); + + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output("output", input_tensor->shape(), + &output_tensor)); + auto output_flat = output_tensor->flat(); + + icu::ErrorCode status; + for (int i = 0; i < input_flat.size(); i++) { + UScriptCode script_code = uscript_getScript(input_flat(i), status); + if (status.isSuccess()) { + output_flat(i) = script_code; + } else { + output_flat(i) = -1; + status.reset(); + } + } + } +}; + +REGISTER_KERNEL_BUILDER(Name("UnicodeScript").Device(DEVICE_CPU), + UnicodeScriptOp); + +} // namespace tensorflow diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index da1d2a6432..b4fbde54d9 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -244,4 +244,9 @@ REGISTER_OP("Substr") return shape_inference::BroadcastBinaryOpShapeFn(c); }); +REGISTER_OP("UnicodeScript") + .Input("input: int32") + .Output("output: int32") + .SetShapeFn(shape_inference::UnchangedShape); + } // namespace tensorflow diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 5183e4d30c..c2e36e5e19 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1097,6 +1097,18 @@ tf_py_test( ], ) +tf_py_test( + name = "unicode_script_op_test", + size = "small", + srcs = ["unicode_script_op_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:string_ops", + ], +) + cuda_py_test( name = "topk_op_test", size = "small", diff --git a/tensorflow/python/kernel_tests/unicode_script_op_test.py b/tensorflow/python/kernel_tests/unicode_script_op_test.py new file mode 100644 index 0000000000..927e5459ed --- /dev/null +++ b/tensorflow/python/kernel_tests/unicode_script_op_test.py @@ -0,0 +1,57 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#=============================================================================== +"""Functional tests for UnicodeScript op.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import string_ops +from tensorflow.python.platform import test + + +class UnicodeScriptOpTest(test.TestCase): + + def testValidScripts(self): + inputs = [ + ord("a"), + 0x0411, # CYRILLIC CAPITAL LETTER BE + 0x82b8, # CJK UNIFIED IDEOGRAPH-82B8 + ord(",") + ] + with self.cached_session(): + input_vector = constant_op.constant(inputs, dtypes.int32) + outputs = string_ops.unicode_script(input_vector).eval() + self.assertAllEqual( + outputs, + [ + 25, # USCRIPT_LATIN (LATN) + 8, # USCRIPT_CYRILLIC (CYRL) + 17, # USCRIPT_HAN (HANI) + 0 # USCRIPT_COMMON (ZYYY) + ]) + + def testInvalidScript(self): + inputs = [-100, 0xffffff] + with self.cached_session(): + input_vector = constant_op.constant(inputs, dtypes.int32) + outputs = string_ops.unicode_script(input_vector).eval() + self.assertAllEqual(outputs, [-1, -1]) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt index c52581dec1..312e94b41d 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.strings.pbtxt @@ -48,4 +48,8 @@ tf_module { name: "to_number" argspec: "args=[\'string_tensor\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'None\'], " } + member_method { + name: "unicode_script" + argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } } diff --git a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt index c52581dec1..312e94b41d 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.strings.pbtxt @@ -48,4 +48,8 @@ tf_module { name: "to_number" argspec: "args=[\'string_tensor\', \'out_type\', \'name\'], varargs=None, keywords=None, defaults=[\"\", \'None\'], " } + member_method { + name: "unicode_script" + argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } } diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index b450bc42c5..095ac1f4cc 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -125,6 +125,7 @@ genrule( "@gemmlowp//:LICENSE", "@gif_archive//:COPYING", "@highwayhash//:LICENSE", + "@icu//:icu4c/LICENSE", "@jpeg//:LICENSE.md", "@llvm//:LICENSE.TXT", "@lmdb//:LICENSE", @@ -192,6 +193,7 @@ genrule( "@gemmlowp//:LICENSE", "@gif_archive//:COPYING", "@highwayhash//:LICENSE", + "@icu//:icu4j/main/shared/licenses/LICENSE", "@jpeg//:LICENSE.md", "@llvm//:LICENSE.TXT", "@lmdb//:LICENSE", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 9d816f0672..cce60ccea0 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -153,6 +153,7 @@ filegroup( "@gemmlowp//:LICENSE", "@gif_archive//:COPYING", "@highwayhash//:LICENSE", + "@icu//:icu4c/LICENSE", "@jpeg//:LICENSE.md", "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 4bf2ff3fb5..e5a0a0b2b7 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -21,9 +21,11 @@ load( "def_file_filter_configure", ) load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") +load("//third_party/icu:workspace.bzl", icu = "repo") def initialize_third_party(): flatbuffers() + icu() # Sanitize a dependency so that it works correctly from code that includes # TensorFlow as a submodule. diff --git a/third_party/icu/BUILD b/third_party/icu/BUILD new file mode 100644 index 0000000000..82bab3ffd9 --- /dev/null +++ b/third_party/icu/BUILD @@ -0,0 +1 @@ +# This empty BUILD file is required to make Bazel treat this directory as a package. diff --git a/third_party/icu/BUILD.bazel b/third_party/icu/BUILD.bazel new file mode 100644 index 0000000000..36d6b9006b --- /dev/null +++ b/third_party/icu/BUILD.bazel @@ -0,0 +1,88 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files([ + "icu4c/LICENSE", + "icu4j/main/shared/licenses/LICENSE", +]) + +cc_library( + name = "headers", + hdrs = glob(["icu4c/source/common/unicode/*.h"]), + includes = [ + "icu4c/source/common", + ], + deps = [ + ], +) + +cc_library( + name = "common", + hdrs = glob(["icu4c/source/common/unicode/*.h"]), + includes = [ + "icu4c/source/common", + ], + deps = [ + ":icuuc", + ], +) + +cc_library( + name = "icuuc", + srcs = glob( + [ + "icu4c/source/common/*.c", + "icu4c/source/common/*.cpp", + "icu4c/source/stubdata/*.cpp", + ], + ), + hdrs = glob([ + "icu4c/source/common/*.h", + ]), + copts = [ + "-DU_COMMON_IMPLEMENTATION", + "-DU_HAVE_STD_ATOMICS", + ] + select({ + ":android": [ + "-fdata-sections", + "-DGOOGLE_VENDOR_SRC_BRANCH", + "-DU_HAVE_NL_LANGINFO_CODESET=0", + "-Wno-deprecated-declarations", + ], + ":apple": [ + "-DGOOGLE_VENDOR_SRC_BRANCH", + "-Wno-shorten-64-to-32", + "-Wno-unused-variable", + ], + ":windows": [ + "/utf-8", + "/DLOCALE_ALLOW_NEUTRAL_NAMES=0", + ], + "//conditions:default": [], + }), + tags = ["requires-rtti"], + visibility = [ + "//visibility:private", + ], + deps = [ + ":headers", + ], +) + +config_setting( + name = "android", + values = {"crosstool_top": "//external:android/crosstool"}, +) + +config_setting( + name = "apple", + values = {"cpu": "darwin"}, +) + +config_setting( + name = "windows", + values = {"cpu": "x64_windows"}, +) diff --git a/third_party/icu/workspace.bzl b/third_party/icu/workspace.bzl new file mode 100644 index 0000000000..bfebf4219b --- /dev/null +++ b/third_party/icu/workspace.bzl @@ -0,0 +1,15 @@ +"""Loads a lightweight subset of the ICU library for Unicode processing.""" + +load("//third_party:repo.bzl", "third_party_http_archive") + +def repo(): + third_party_http_archive( + name = "icu", + strip_prefix = "icu-release-62-1", + sha256 = "e15ffd84606323cbad5515bf9ecdf8061cc3bf80fb883b9e6aa162e485aa9761", + urls = [ + "https://mirror.bazel.build/github.com/unicode-org/icu/archive/release-62-1.tar.gz", + "https://github.com/unicode-org/icu/archive/release-62-1.tar.gz", + ], + build_file = "//third_party/icu:BUILD.bazel", + ) -- GitLab From 6d41787c32483b28f8c93973f28d4d078ea0b37e Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Thu, 27 Sep 2018 10:53:36 -0700 Subject: [PATCH 083/570] Add opaque field to custom call. The intent of this field is to enable more information to be encoded in the custom call and passed through to the backend. PiperOrigin-RevId: 214800539 --- tensorflow/compiler/xla/client/xla_builder.cc | 8 ++++--- tensorflow/compiler/xla/client/xla_builder.h | 24 +++++++++++-------- tensorflow/compiler/xla/service/hlo.proto | 8 +++++-- .../compiler/xla/service/hlo_instruction.cc | 9 +++---- .../compiler/xla/service/hlo_instruction.h | 5 ++-- .../compiler/xla/service/hlo_instructions.cc | 14 ++++++++--- .../compiler/xla/service/hlo_instructions.h | 8 +++++-- tensorflow/compiler/xla/service/hlo_parser.cc | 7 ++++-- .../compiler/xla/service/hlo_parser_test.cc | 12 ++++++++++ 9 files changed, 67 insertions(+), 28 deletions(-) diff --git a/tensorflow/compiler/xla/client/xla_builder.cc b/tensorflow/compiler/xla/client/xla_builder.cc index 95ff6432a5..5277de6a85 100644 --- a/tensorflow/compiler/xla/client/xla_builder.cc +++ b/tensorflow/compiler/xla/client/xla_builder.cc @@ -1278,7 +1278,7 @@ XlaOp XlaBuilder::AfterAll(absl::Span tokens) { XlaOp XlaBuilder::CustomCall(const string& call_target_name, absl::Span operands, - const Shape& shape) { + const Shape& shape, const string& opaque) { return ReportErrorOrReturn([&]() -> StatusOr { HloInstructionProto instr; if (absl::StartsWith(call_target_name, "$")) { @@ -1289,6 +1289,7 @@ XlaOp XlaBuilder::CustomCall(const string& call_target_name, } *instr.mutable_shape() = shape; instr.set_custom_call_target(call_target_name); + instr.set_custom_call_opaque(opaque); return AddInstruction(std::move(instr), HloOpcode::kCustomCall, operands); }); } @@ -2681,8 +2682,9 @@ XlaOp Call(XlaBuilder* builder, const XlaComputation& computation, } XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, - absl::Span operands, const Shape& shape) { - return builder->CustomCall(call_target_name, operands, shape); + absl::Span operands, const Shape& shape, + const string& opaque) { + return builder->CustomCall(call_target_name, operands, shape, opaque); } XlaOp Complex(const XlaOp& real, const XlaOp& imag, diff --git a/tensorflow/compiler/xla/client/xla_builder.h b/tensorflow/compiler/xla/client/xla_builder.h index d0c59fa6f2..1da6ddd318 100644 --- a/tensorflow/compiler/xla/client/xla_builder.h +++ b/tensorflow/compiler/xla/client/xla_builder.h @@ -577,11 +577,9 @@ class XlaBuilder { absl::Span operands); // Enqueues a custom call instruction onto the computation. - // During code generation, a call instruction is emitted which targets a - // symbol with the name |call_target_name|. The |operands| are passed to the - // call instruction. |shape| is the resultant shape. XlaOp CustomCall(const string& call_target_name, - absl::Span operands, const Shape& shape); + absl::Span operands, const Shape& shape, + const string& opaque); // The following methods enqueue element-wise binary arithmetic operations // onto the computation. The shapes of the operands have to match unless one @@ -1195,7 +1193,8 @@ class XlaBuilder { friend XlaOp Call(XlaBuilder* builder, const XlaComputation& computation, absl::Span operands); friend XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, - absl::Span operands, const Shape& shape); + absl::Span operands, const Shape& shape, + const string& opaque); friend XlaOp Complex(const XlaOp& real, const XlaOp& imag, absl::Span broadcast_dimensions); friend XlaOp Conj(const XlaOp& operand); @@ -1717,12 +1716,17 @@ XlaOp OutfeedWithToken(const XlaOp& operand, const XlaOp& token, XlaOp Call(XlaBuilder* builder, const XlaComputation& computation, absl::Span operands); -// Enqueues a custom call instruction onto the computation. -// During code generation, a call instruction is emitted which targets a -// symbol with the name |call_target_name|. The |operands| are passed to the -// call instruction. |shape| is the resultant shape. +// Enqueues a custom call instruction onto the computation. A custom call +// invokes code external to XLA. The |operands| are passed to the external code, +// and the external code is expected to produce a result of the given +// |shape|. The exact mechanism is backend-specific. For example, in the CPU +// backend, a call instruction is emitted which targets a symbol with the name +// |call_target_name|. |call_target_name| and |opaque| can arbitrary strings, +// but |call_target_name| should be short as it may be used in labels. |opaque| +// can encode arbitrarily large amounts of information. XlaOp CustomCall(XlaBuilder* builder, const string& call_target_name, - absl::Span operands, const Shape& shape); + absl::Span operands, const Shape& shape, + const string& opaque = ""); // The following methods enqueue element-wise binary arithmetic operations // onto the computation. The shapes of the operands have to match unless one diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index b19ec12638..caaca16f71 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -34,7 +34,7 @@ import "tensorflow/compiler/xla/xla_data.proto"; option cc_enable_arenas = true; // Serialization of HloInstruction. -// Next ID: 53 +// Next ID: 54 message HloInstructionProto { reserved 10; reserved "parameter_name"; @@ -124,9 +124,13 @@ message HloInstructionProto { // The string representation of the infeed configuration. bytes infeed_config = 27; - // Name of a global symbol to call, only present for kCustomCall. + // Name of a external target (eg, global symbol) to call, only present for + // kCustomCall. string custom_call_target = 28; + // Opaque string, only present for kCustomCall. + string custom_call_opaque = 53; + // Shape of outfeed request. xla.Shape outfeed_shape = 29; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index f7ec854d80..23787dbc8a 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -379,7 +379,8 @@ StatusOr> HloInstruction::CreateFromProto( break; case HloOpcode::kCustomCall: instruction = CreateCustomCall(proto.shape(), all_operands(), - proto.custom_call_target()); + proto.custom_call_target(), + proto.custom_call_opaque()); if (proto.has_window()) { static_cast(instruction.get()) ->set_window(proto.window()); @@ -1108,9 +1109,9 @@ bool HloInstruction::HasSideEffect() const { /* static */ std::unique_ptr HloInstruction::CreateCustomCall( const Shape& shape, absl::Span operands, - absl::string_view custom_call_target) { - return absl::make_unique(shape, operands, - custom_call_target); + absl::string_view custom_call_target, absl::string_view opaque) { + return absl::make_unique( + shape, operands, custom_call_target, opaque); } /* static */ std::unique_ptr HloInstruction::CreateTuple( diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index d615df0831..009bd3bab3 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -718,10 +718,11 @@ class HloInstruction { HloComputation* computation); // Creates a custom call instruction that applies the given custom call target - // to the given operands. "shape" is the resultant shape. + // to the given operands. "opaque" can be an arbitrary string with a + // backend-specific interpretation. "shape" is the resultant shape. static std::unique_ptr CreateCustomCall( const Shape& shape, absl::Span operands, - absl::string_view custom_call_target); + absl::string_view custom_call_target, absl::string_view opaque = ""); // Creates a tuple instruction with the given elements. This is a convenience // wrapper around CreateVariadic. diff --git a/tensorflow/compiler/xla/service/hlo_instructions.cc b/tensorflow/compiler/xla/service/hlo_instructions.cc index e92882c22a..cd71bc3323 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.cc +++ b/tensorflow/compiler/xla/service/hlo_instructions.cc @@ -1830,9 +1830,10 @@ HloSelectAndScatterInstruction::CloneWithNewOperandsImpl( HloCustomCallInstruction::HloCustomCallInstruction( const Shape& shape, absl::Span operands, - absl::string_view custom_call_target) + absl::string_view custom_call_target, absl::string_view opaque) : HloInstruction(HloOpcode::kCustomCall, shape), custom_call_target_(custom_call_target.begin(), custom_call_target.end()), + opaque_(opaque.begin(), opaque.end()), feature_group_count_(1) { for (auto operand : operands) { AppendOperand(operand); @@ -1849,6 +1850,7 @@ HloInstructionProto HloCustomCallInstruction::ToProto() const { *convolution_dimension_numbers_; } proto.set_custom_call_target(custom_call_target_); + proto.set_custom_call_opaque(opaque_); proto.set_feature_group_count(feature_group_count_); return proto; } @@ -1872,6 +1874,11 @@ std::vector HloCustomCallInstruction::ExtraAttributesToStringImpl( // an HloComputation. extra.push_back( StrCat("custom_call_target=\"", CEscape(custom_call_target_), "\"")); + // If the opaque string becomes enormous we may want to reconsider printing + // this inline and consider other options. + if (!opaque_.empty()) { + extra.push_back(StrCat("opaque=\"", CEscape(opaque_), "\"")); + } return extra; } @@ -1897,7 +1904,8 @@ bool HloCustomCallInstruction::IdenticalSlowPath( if (feature_group_count_ != casted_other.feature_group_count_) { return false; } - return custom_call_target_ == casted_other.custom_call_target_; + return custom_call_target_ == casted_other.custom_call_target_ && + opaque_ == casted_other.opaque_; } std::unique_ptr @@ -1905,7 +1913,7 @@ HloCustomCallInstruction::CloneWithNewOperandsImpl( const Shape& shape, absl::Span new_operands, HloCloneContext* context) const { auto cloned = absl::make_unique( - shape, new_operands, custom_call_target()); + shape, new_operands, custom_call_target(), opaque()); if (window_ != nullptr) { cloned->set_window(*window_); } diff --git a/tensorflow/compiler/xla/service/hlo_instructions.h b/tensorflow/compiler/xla/service/hlo_instructions.h index 2d7bc83855..9c22f5db7e 100644 --- a/tensorflow/compiler/xla/service/hlo_instructions.h +++ b/tensorflow/compiler/xla/service/hlo_instructions.h @@ -1070,7 +1070,8 @@ class HloCustomCallInstruction : public HloInstruction { public: explicit HloCustomCallInstruction(const Shape& shape, absl::Span operands, - absl::string_view custom_call_target); + absl::string_view custom_call_target, + absl::string_view opaque); const Window& window() const override { CHECK(window_ != nullptr); return *window_; @@ -1090,6 +1091,7 @@ class HloCustomCallInstruction : public HloInstruction { convolution_dimension_numbers_ = absl::make_unique(dnums); } + const string& opaque() const { return opaque_; } const string& custom_call_target() const { return custom_call_target_; } void set_feature_group_count(int64 feature_group_count) { feature_group_count_ = feature_group_count; @@ -1109,8 +1111,10 @@ class HloCustomCallInstruction : public HloInstruction { std::unique_ptr CloneWithNewOperandsImpl( const Shape& shape, absl::Span new_operands, HloCloneContext* context) const override; - // Name of a global symbol to call, only present for kCustomCall. + // Name of a global symbol to call. string custom_call_target_; + // Opaque string interpreted by the backend. + string opaque_; // Describes the window in a windowed operation such as convolution. std::unique_ptr window_; // Describes the dimension numbers used for a convolution. diff --git a/tensorflow/compiler/xla/service/hlo_parser.cc b/tensorflow/compiler/xla/service/hlo_parser.cc index 37197b273b..25b70740e3 100644 --- a/tensorflow/compiler/xla/service/hlo_parser.cc +++ b/tensorflow/compiler/xla/service/hlo_parser.cc @@ -1266,11 +1266,13 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, } case HloOpcode::kCustomCall: { optional custom_call_target; + optional opaque; optional window; optional dnums; optional feature_group_count; attrs["custom_call_target"] = {/*required=*/true, AttrTy::kString, &custom_call_target}; + attrs["opaque"] = {/*required=*/false, AttrTy::kString, &opaque}; attrs["window"] = {/*required=*/false, AttrTy::kWindow, &window}; attrs["dim_labels"] = {/*required=*/false, AttrTy::kConvolutionDimensionNumbers, &dnums}; @@ -1279,8 +1281,9 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, if (!ParseOperands(&operands) || !ParseAttributes(attrs)) { return false; } - instruction = builder->AddInstruction(HloInstruction::CreateCustomCall( - shape, operands, *custom_call_target)); + instruction = builder->AddInstruction( + HloInstruction::CreateCustomCall(shape, operands, *custom_call_target, + opaque.has_value() ? *opaque : "")); if (window.has_value()) { instruction->set_window(*window); } diff --git a/tensorflow/compiler/xla/service/hlo_parser_test.cc b/tensorflow/compiler/xla/service/hlo_parser_test.cc index cca50fab54..96db96bdb9 100644 --- a/tensorflow/compiler/xla/service/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/service/hlo_parser_test.cc @@ -1002,6 +1002,18 @@ ENTRY CustomCall { ROOT custom-call = f32[1,2,3]{0,2,1} custom-call(constant), custom_call_target="foo\"bar" } +)" +}, +// CustomCall with opaque value. +{ +"CustomCallWithOpaque", +R"(HloModule custom_call + +ENTRY CustomCall { + constant = f32[1]{0} constant({12345}) + ROOT custom-call = f32[1,2,3]{0,2,1} custom-call(constant), custom_call_target="foo\"bar", opaque="this string is opaque" +} + )" }, // Variables with non-default names -- GitLab From dcf72802384fdab6744d3c16577091a82bc2cce0 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 27 Sep 2018 11:01:56 -0700 Subject: [PATCH 084/570] Clean up unused members in DirectSession and Executor. PiperOrigin-RevId: 214802032 --- .../core/common_runtime/direct_session.cc | 8 ++++---- .../core/common_runtime/direct_session.h | 20 ++++++++----------- tensorflow/core/common_runtime/executor.h | 6 ------ 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index af5d5b17e7..841181f8c3 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -363,7 +363,7 @@ Status DirectSession::MaybeInitializeExecutionState( Status DirectSession::Create(const GraphDef& graph) { TF_RETURN_IF_ERROR(init_error_); if (graph.node_size() > 0) { - mutex_lock l(graph_def_lock_); + mutex_lock l(graph_state_lock_); if (graph_created_) { return errors::AlreadyExists( "A Graph has already been created for this session."); @@ -375,7 +375,7 @@ Status DirectSession::Create(const GraphDef& graph) { Status DirectSession::Extend(const GraphDef& graph) { TF_RETURN_IF_ERROR(CheckNotClosed()); - mutex_lock l(graph_def_lock_); + mutex_lock l(graph_state_lock_); return ExtendLocked(graph); } @@ -1172,7 +1172,7 @@ Status DirectSession::CreateExecutors( int graph_def_version; { - mutex_lock l(graph_def_lock_); + mutex_lock l(graph_state_lock_); graph_def_version = execution_state_->original_graph_def().versions().producer(); } @@ -1400,7 +1400,7 @@ Status DirectSession::CreateGraphs( std::unique_ptr* flib_def, RunStateArgs* run_state_args, DataTypeVector* input_types, DataTypeVector* output_types, int64* collective_graph_key) { - mutex_lock l(graph_def_lock_); + mutex_lock l(graph_state_lock_); std::unique_ptr client_graph; std::unique_ptr temp_exec_state_holder; diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index c2cf3c7fd7..4a6a921ea7 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -215,7 +215,7 @@ class DirectSession : public Session { // if not already initialized. Status MaybeInitializeExecutionState(const GraphDef& graph, bool* out_already_initialized) - EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_); + EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_); // Retrieves an already existing set of executors to run 'inputs' and // 'outputs', or creates and caches them for future use. @@ -248,7 +248,7 @@ class DirectSession : public Session { RunMetadata* run_metadata); ::tensorflow::Status ExtendLocked(const GraphDef& graph) - EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_); + EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_); ::tensorflow::Status ResourceHandleToInputTensor( const Tensor& resource_tensor, Tensor* retrieved_tensor); @@ -289,7 +289,7 @@ class DirectSession : public Session { } ::tensorflow::Status CheckGraphCreated(const char* method) { - mutex_lock l(graph_def_lock_); + mutex_lock l(graph_state_lock_); if (!graph_created_) { return errors::InvalidArgument( "Session was not created with a graph before ", method, "!"); @@ -313,10 +313,8 @@ class DirectSession : public Session { DeviceSet device_set_; string session_handle_; - bool graph_created_ GUARDED_BY(graph_def_lock_) = false; - - mutex graph_def_lock_; - GraphDef graph_def_ GUARDED_BY(graph_def_lock_); + mutex graph_state_lock_; + bool graph_created_ GUARDED_BY(graph_state_lock_) = false; // The thread-pools to use for running ops, with a bool indicating if the pool // is owned. @@ -367,11 +365,11 @@ class DirectSession : public Session { // nodes can not be moved to a different device. Maps node names to // device names. std::unordered_map stateful_placements_ - GUARDED_BY(graph_def_lock_); + GUARDED_BY(graph_state_lock_); // Execution_state; used when placing the entire graph. std::unique_ptr execution_state_ - GUARDED_BY(graph_def_lock_); + GUARDED_BY(graph_state_lock_); // The function library, before any rewrites or optimizations have been // performed. In particular, CreateGraphs() may need to modify the function @@ -386,7 +384,7 @@ class DirectSession : public Session { std::atomic edge_name_counter_ = {0}; std::atomic handle_name_counter_ = {0}; - // For generating step ids that are unique across all sessions. + // For generating step ids that are unique across this sessions. static std::atomic_int_fast64_t step_id_counter_; // Global timeout for all blocking operations in this session. @@ -395,8 +393,6 @@ class DirectSession : public Session { // Manages all the cost models for the graphs executed in this session. CostModelManager cost_model_manager_; - Executor::Args::NodeOutputsCallback node_outputs_callback_ = nullptr; - // For testing collective graph key generation. mutex collective_graph_key_lock_; int64 collective_graph_key_ GUARDED_BY(collective_graph_key_lock_) = -1; diff --git a/tensorflow/core/common_runtime/executor.h b/tensorflow/core/common_runtime/executor.h index 6cd4fd22ea..34bf73972f 100644 --- a/tensorflow/core/common_runtime/executor.h +++ b/tensorflow/core/common_runtime/executor.h @@ -97,12 +97,6 @@ class Executor { typedef std::function Closure; typedef std::function Runner; Runner runner = nullptr; - - // A callback that is invoked each time a node has finished executing. - typedef std::function - NodeOutputsCallback; }; typedef std::function DoneCallback; virtual void RunAsync(const Args& args, DoneCallback done) = 0; -- GitLab From 50b94fa1d50a916eaf7a5a46d93260e9b0f93554 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 27 Sep 2018 11:07:09 -0700 Subject: [PATCH 085/570] Internal change PiperOrigin-RevId: 214803223 --- tensorflow/contrib/fused_conv/BUILD | 35 +- .../fused_conv2d_bias_activation_op.cc | 4 +- .../fused_conv2d_bias_activation_op_test.py | 891 +---------------- ...sed_conv2d_bias_activation_op_test_base.py | 945 ++++++++++++++++++ .../tools/pip_package/pip_smoke_test.py | 1 + 5 files changed, 985 insertions(+), 891 deletions(-) create mode 100644 tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py diff --git a/tensorflow/contrib/fused_conv/BUILD b/tensorflow/contrib/fused_conv/BUILD index 9725233e7f..490da9b33b 100644 --- a/tensorflow/contrib/fused_conv/BUILD +++ b/tensorflow/contrib/fused_conv/BUILD @@ -17,11 +17,14 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load( + "//tensorflow:tensorflow.bzl", + "tf_kernel_library", + "tf_custom_op_library", + "tf_gen_op_libs", + "tf_gen_op_wrapper_py", +) load("//tensorflow:tensorflow.bzl", "cuda_py_test") -load("//tensorflow:tensorflow.bzl", "tf_custom_op_library") -load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") -load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") tf_custom_op_py_library( @@ -109,12 +112,13 @@ tf_gen_op_wrapper_py( deps = [":fused_conv2d_bias_activation_op_op_lib"], ) -cuda_py_test( - name = "fused_conv2d_bias_activation_op_test", - srcs = ["python/ops/fused_conv2d_bias_activation_op_test.py"], - additional_deps = [ +py_library( + name = "fused_conv2d_bias_activation_op_test_base", + testonly = 1, + srcs = ["python/ops/fused_conv2d_bias_activation_op_test_base.py"], + visibility = ["//tensorflow/compiler/tf2xla:internal"], + deps = [ ":fused_conv_py", - "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:client", "//tensorflow/python:client_testlib", @@ -127,8 +131,21 @@ cuda_py_test( "//tensorflow/python:random_ops", "//tensorflow/python:training", "//tensorflow/python:variables", + "//third_party/py/numpy", + "@absl_py//absl/testing:parameterized", + ], +) + +cuda_py_test( + name = "fused_conv2d_bias_activation_op_test", + size = "large", + srcs = ["python/ops/fused_conv2d_bias_activation_op_test.py"], + additional_deps = [ + ":fused_conv2d_bias_activation_op_test_base", + "//tensorflow/python:client_testlib", ], tags = [ + "no_pip", "requires-gpu-sm70", ], ) diff --git a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc index e9e6464d06..93b1aaa85e 100644 --- a/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc +++ b/tensorflow/contrib/fused_conv/kernels/fused_conv2d_bias_activation_op.cc @@ -111,8 +111,8 @@ class FusedConv2DBiasActivationOp : public OpKernel { context, (GetTensorDim(strides, data_format_, 'N') == 1 && GetTensorDim(strides, data_format_, 'C') == 1), - errors::InvalidArgument("Convolutional strides are not supported in " - "the batch or depth dimensions.")); + errors::Unimplemented("Convolutional strides are not supported in " + "the batch and depth dimensions.")); // Assuming qint8 <--> NCHW_VECT_C, OIHW_VECT_I (int8x4) here. constexpr bool is_int8x4 = std::is_same::value; diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py index 4894298694..e5c8a34fc1 100644 --- a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test.py @@ -12,896 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Functional tests for fused conv2d bias and activation operation.""" + +"""Tests for fused convolutions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - -from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors_impl -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_array_ops -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import random_ops +from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op_test_base as test_base from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging - - -def GetShrunkInceptionShapes(shrink=10): - """Iterator for smaller versions of convolution shapes in 2015 Inception. - - Relative to inception, each depth value is `depth // shrink`. - - Args: - shrink: Factor to shrink each depth value by relative to Inception. - - Yields: - Tuple (input_size, filter_size, out_size, stride, padding), the convolution - parameters of Inception layers. - """ - input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384], [ - 4, 8, 8, 2048 - ], [4, 8, 8, 448], [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 2048], [ - 4, 8, 8, 1760 - ], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 17, 17, 192], [ - 4, 17, 17, 192 - ], [4, 17, 17, 1248], [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224], [ - 4, 17, 17, 192 - ], [4, 17, 17, 192], [4, 17, 17, 1216], [4, 17, 17, 1216], [4, 17, 17, 224], [ - 4, 17, 17, 192 - ], [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152], [4, 17, 17, 192], [ - 4, 17, 17, 160 - ], [4, 17, 17, 1152], [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024], - [4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128], [ - 4, 17, 17, 768 - ], [4, 17, 17, 128], [4, 17, 17, 128], [4, 17, 17, 768], - [4, 17, 17, 768], [4, 35, 35, 96], [4, 35, 35, 288], [ - 4, 35, 35, 64 - ], [4, 35, 35, 288], [4, 35, 35, 256], [4, 35, 35, 48], [ - 4, 35, 35, 256 - ], [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192], [ - 4, 35, 35, 192 - ], [4, 73, 73, 64], [4, 73, 73, 64], [4, 147, 147, 24]] - filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384], [ - 1, 1, 2048, 192 - ], [3, 3, 448, 384], [1, 1, 2048, 320], [1, 1, 2048, 448], [1, 1, 2048, 384], - [1, 1, 1760, 384], [1, 1, 1760, 192], [1, 1, 1760, 448], [ - 1, 1, 1760, 320 - ], [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192], [ - 3, 3, 128, 320 - ], [1, 1, 1248, 128], [1, 3, 224, 224], [3, 1, 192, 256], [ - 1, 3, 192, 256 - ], [1, 1, 1216, 192], [1, 1, 1216, 96], [3, 1, 224, 224], [ - 3, 3, 192, 224 - ], [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128], [ - 3, 1, 192, 192 - ], [3, 3, 160, 192], [1, 1, 1152, 160], [1, 1, 1024, 128], [ - 1, 3, 128, 192 - ], [1, 1, 1024, 160], [3, 1, 128, 192], [1, 1, 1024, 256], [ - 3, 1, 128, 128 - ], [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128], [ - 1, 1, 768, 128 - ], [1, 1, 768, 320], [3, 3, 96, 96], [3, 3, 288, 384], [ - 3, 3, 64, 96 - ], [1, 1, 288, 64], [1, 1, 256, 64], [5, 5, 48, 64], - [1, 1, 256, 48], [3, 3, 96, 96], [1, 1, 192, 32], [ - 1, 1, 192, 64 - ], [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64, - 64], [1, 1, 24, 64]] - out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384], [4, 8, 8, 192], [ - 4, 8, 8, 384 - ], [4, 8, 8, 320], [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384], [ - 4, 8, 8, 192 - ], [4, 8, 8, 448], [4, 8, 8, 320], [4, 8, 8, 192], [4, 17, 17, 192], [ - 4, 17, 17, 192 - ], [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224], [4, 17, 17, 256], [ - 4, 17, 17, 256 - ], [4, 17, 17, 192], [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224], [ - 4, 17, 17, 192 - ], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 192], [ - 4, 17, 17, 160 - ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 192], [ - 4, 17, 17, 256 - ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128], [ - 4, 17, 17, 128 - ], [4, 17, 17, 320], [4, 17, 17, 96], [4, 17, 17, 384], [4, 35, 35, 96], [ - 4, 35, 35, 64 - ], [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48], [4, 35, 35, 96], - [4, 35, 35, 32], [4, 35, 35, 64], [4, 35, 35, 48], - [4, 71, 71, 192], [4, 73, 73, 64], [4, 147, 147, 64]] - strides = [ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1 - ] - # Shrink sizes to make the test faster - for i in input_sizes: - i[3] //= shrink - for f in filter_sizes: - f[2] //= shrink - f[3] //= shrink - for o in out_sizes: - o[3] //= shrink - # pylint: disable=invalid-name - VALID = "VALID" - SAME = "SAME" - # pylint: enable=invalid-name - paddings = [ - SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, - VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, - SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, - SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME, - SAME, SAME, SAME, SAME, VALID, VALID, VALID - ] - for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides, - paddings): - yield i, f, o, s, p - - -def GetTestConfigs(): - """Get all the valid tests configs to run. - - Returns: - all the valid test configs as tuples of data_format and use_gpu. - """ - test_configs = [("NCHW", True), ("NHWC", True)] - return test_configs - - -class FusedConv2DBiasActivationTest(test.TestCase): - - def _DtypesToTest(self, use_gpu): - return [dtypes.float32] - - def _FilterFormatsToTest(self, use_gpu): - return ["HWIO", "OIHW"] - - def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias, - strides, padding, activation_mode, data_format, - filter_format, dtype): - """Verifies the output values of the convolution function. - - Args: - tensor_in_sizes: Input tensor dimensions in - [batch, input_rows, input_cols, input_depth]. - filter_in_sizes: Filter tensor dimensions in - [kernel_rows, kernel_cols, input_depth, output_depth]. - bias: 1-D bias tensor of length output_depth. - strides: Stride: [col_stride, row_stride] - padding: Padding type. - activation_mode: Activation mode. - data_format: Format of the data tensors. - filter_format: Filter format to use for the fused convolution. - dtype: Data type for inputs and outputs. - Returns: - Symbolic tensor value and reference value that can be used to - execute the computation and verify the results. - """ - input_size = np.prod(tensor_in_sizes) - filter_size = np.prod(filter_in_sizes) - bias_size = filter_in_sizes[-1] # equals to output depth - # Initializes the input tensor with array containing incrementing - # numbers from 1. - x1 = [f * 1.0 for f in range(1, input_size + 1)] - x2 = [f * 1.0 for f in range(1, filter_size + 1)] - # This is to guarantee that there is always negative values after - # bias add so that we can test whether relu works correctly. - x3 = bias - with self.test_session(use_gpu=True): - t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) - t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) - fused_t2 = t2 - if filter_format == "OIHW": - fused_t2 = HwioToOihw(t2) - t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype) - strides = [1] + strides + [1] - if data_format == "NCHW": - t1 = test_util.NHWCToNCHW(t1) - strides = test_util.NHWCToNCHW(strides) - output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - t1, - fused_t2, - t3, - strides=strides, - padding=padding, - data_format=data_format, - filter_format=filter_format, - activation_mode=activation_mode) - ref_conv_output = nn_ops.conv2d( - t1, t2, strides=strides, padding=padding, data_format=data_format) - ref_bias_output = nn_ops.bias_add( - ref_conv_output, t3, data_format=data_format) - ref_output = nn_ops.relu(ref_bias_output) - if data_format == "NCHW": - output = test_util.NCHWToNHWC(output) - ref_output = test_util.NCHWToNHWC(ref_output) - - return output, ref_output - - def _CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides, - padding): - """Verifies that CPU and GPU produce the same values. - - Args: - tensor_in_sizes: Input tensor dimensions in - [batch, input_rows, input_cols, input_depth]. - filter_in_sizes: Filter tensor dimensions in - [kernel_rows, kernel_cols, input_depth, output_depth]. - conv_strides: [row_stride, col_stride] for the convolution; - padding: Padding type. - """ - x1 = np.random.rand(*tensor_in_sizes).astype(np.float32) - x2 = np.random.rand(*filter_in_sizes).astype(np.float32) - x3 = np.random.rand(*[filter_in_sizes[-1]]).astype(np.float32) - - def _SetupVal(data_format, use_gpu): - with self.test_session(use_gpu=use_gpu): - t1 = constant_op.constant(x1, shape=tensor_in_sizes) - t2 = constant_op.constant(x2, shape=filter_in_sizes) - t3 = constant_op.constant(x3, shape=[filter_in_sizes[-1]]) - strides = [1] + conv_strides + [1] - if data_format == "NCHW": - t1 = test_util.NHWCToNCHW(t1) - strides = test_util.NHWCToNCHW(strides) - output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - t1, - t2, - t3, - strides=strides, - padding=padding, - data_format=data_format, - activation_mode="Relu") - - if data_format == "NCHW": - output = test_util.NCHWToNHWC(output) - return output - - tensors = [] - for (data_format, use_gpu) in GetTestConfigs(): - tensors.append(_SetupVal(data_format, use_gpu)) - with self.cached_session() as sess: - values = sess.run(tensors) - for i in range(1, len(values)): - self.assertAllClose(values[0], values[i], rtol=1e-3, atol=1e-3) - - def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, bias, strides, - padding): - tensors = [] - ref_tensors = [] - for (data_format, use_gpu) in GetTestConfigs(): - for dtype in self._DtypesToTest(use_gpu): - for filter_format in self._FilterFormatsToTest(use_gpu): - result, expected = self._SetupValuesForDevice( - tensor_in_sizes, filter_in_sizes, bias, strides, padding, "Relu", - data_format, filter_format, dtype) - tensors.append(result) - ref_tensors.append(expected) - with self.cached_session() as sess: - values = sess.run(tensors) - ref_values = sess.run(ref_tensors) - for i in range(len(tensors)): - conv = tensors[i] - value = values[i] - ref_value = ref_values[i] - tf_logging.info("expected = ", ref_value) - tf_logging.info("actual = ", value) - tol = 1e-5 - if value.dtype == np.float16: - tol = 1e-3 - self.assertAllClose( - np.ravel(ref_value), np.ravel(value), atol=tol, rtol=tol) - self.assertShapeEqual(value, conv) - - def testConv2D1x1Filter(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D1x1Filter test.") - return - # expected_output = [ - # 0.0, 0.0, 0.0, 21.0, 0.0, 0.0, 57.0, 0.0, 0.0, 93.0, 41.0, 0.0, 129.0, - # 86.0, 43.0, 165.0, 131.0, 97.0 - # ] - medians = [-45.0, -130.0, -215.0] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[1, 1, 3, 3], - bias=medians, - strides=[1, 1], - padding="VALID") - - def testConv2DEmpty(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2DEmpty test.") - return - # expected_output = [] - self._VerifyValues( - tensor_in_sizes=[0, 2, 3, 3], - filter_in_sizes=[1, 1, 3, 3], - bias=[0.0, 0.0, 0.0], - strides=[1, 1], - padding="VALID") - - def testConv2D2x2Filter(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D2x2Filter test.") - return - # expected_output = [0.0, 0.0, 0.0, 401.0, 533.0, 665.0] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[2, 2, 3, 3], - bias=[-2500.0, -2500.0, -2500.0], - strides=[1, 1], - padding="VALID") - - def testConv2D1x2Filter(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D1x2Filter test.") - return - # expected_output = [ - # 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 190.0, 265.0, 340.0, 343.0, 436.0, 529.0 - # ] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[1, 2, 3, 3], - bias=[-500.0, -500.0, -500.0], - strides=[1, 1], - padding="VALID") - - def testConv2D2x2FilterStride2(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D2x2FilterStride2 test.") - return - # expected_output = [0.0, 67.0, 163.0] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[2, 2, 3, 3], - bias=[-2300.0, -2300.0, -2300.0], - strides=[2, 2], - padding="VALID") - - def testConv2D2x2FilterStride2Same(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D2x2FilterStride2Same test.") - return - # expected_output = [0.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0] - self._VerifyValues( - tensor_in_sizes=[1, 2, 3, 3], - filter_in_sizes=[2, 2, 3, 3], - bias=[-2300.0, -1000.0, -1000.0], - strides=[2, 2], - padding="SAME") - - def testConv2D2x2FilterStride1x2(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2D2x2FilterStride1x2 test.") - return - # expected_output = [0.0, 0.0, 8.0, 28.0, 48.0, 68.0] - self._VerifyValues( - tensor_in_sizes=[1, 3, 6, 1], - filter_in_sizes=[2, 2, 1, 1], - bias=[-90.0], - strides=[1, 2], - padding="VALID") - - def testConv2DKernelSmallerThanStrideValid(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2DKernelSmallerThanStrideValid test.") - return - # expected_output = [0, 0, 175, 205] - self._VerifyValues( - tensor_in_sizes=[1, 7, 7, 1], - filter_in_sizes=[2, 2, 1, 1], - bias=[-100.0], - strides=[3, 3], - padding="VALID") - - def testConv2DKernelSmallerThanStrideSame(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2DKernelSmallerThanStrideSame test.") - return - # expected = [0, 0, 2, 4] - self._VerifyValues( - tensor_in_sizes=[1, 3, 3, 1], - filter_in_sizes=[1, 1, 1, 1], - bias=[-5.0], - strides=[2, 2], - padding="SAME") - - # expected = [0, 0, 4, 6] - self._VerifyValues( - tensor_in_sizes=[1, 4, 4, 1], - filter_in_sizes=[1, 1, 1, 1], - bias=[-5.0], - strides=[2, 2], - padding="SAME") - - # expected = [4, 0, 1, 0] - self._VerifyValues( - tensor_in_sizes=[1, 4, 4, 1], - filter_in_sizes=[2, 2, 1, 1], - bias=[-40.0], - strides=[3, 3], - padding="SAME") - - def testConv2DKernelSizeMatchesInputSize(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping Conv2DKernelSizeMatchesInputSize test.") - return - # expected = [0, 5] - self._VerifyValues( - tensor_in_sizes=[1, 2, 2, 1], - filter_in_sizes=[2, 2, 1, 2], - bias=[-50.0, -55.0], - strides=[1, 1], - padding="VALID") - - # expected = [0, 2, 282, 322] - self._VerifyValues( - tensor_in_sizes=[1, 8, 8, 1], - filter_in_sizes=[2, 2, 1, 1], - bias=[-200.0], - strides=[4, 4], - padding="SAME") - - def testShapeFunctionEdgeCases(self): - # All shapes unknown. - c1 = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Relu") - self.assertEqual([None, None, None, None], c1.get_shape().as_list()) - - # Incorrect input shape. - with self.assertRaises(ValueError): - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32, shape=[1, 3]), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Relu") - - # Incorrect filter shape. - with self.assertRaises(ValueError): - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32, shape=[1, 3]), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Relu") - - # Depth mismatch. - with self.assertRaises(ValueError): - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), - array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 2]), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Relu") - - def testOpEdgeCases(self, gpu_only=True): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping OpEdgeCases tests.") - return - with self.cached_session() as sess: - # Illegal strides. - with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, - "Convolutional strides are not supported in " - "the batch or depth dimensions."): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[2, 1, 1, 1], - padding="SAME", - activation_mode="Relu")) - with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, - "Convolutional strides are not supported in " - "the batch or depth dimensions."): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 2], - padding="SAME", - activation_mode="Relu")) - - # Illegal activation mode. - with self.assertRaisesRegexp(ValueError, - "Op passed string 'Tanh' not in:"): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - array_ops.placeholder(dtypes.float32), - strides=[1, 1, 1, 1], - padding="SAME", - activation_mode="Tanh")) - - # Filter larger than input. - with self.assertRaisesRegexp(ValueError, "Negative dimension size"): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), - array_ops.placeholder(dtypes.float32, shape=[20, 21, 3, 2]), - array_ops.placeholder(dtypes.float32, shape=[2]), - strides=[1, 1, 1, 1], - padding="VALID", - activation_mode="Relu")) - with self.assertRaisesRegexp(ValueError, "Negative dimension size"): - sess.run( - fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), - array_ops.placeholder(dtypes.float32, shape=[21, 20, 3, 2]), - array_ops.placeholder(dtypes.float32, shape=[2]), - strides=[1, 1, 1, 1], - padding="VALID", - activation_mode="Relu")) - - -def GetInceptionFwdTest(input_size, filter_size, stride, padding, - gpu_only=True): - - def Test(self): - if gpu_only and not test.is_gpu_available(): - tf_logging.info("Skipping InceptionFwd %s", (input_size, filter_size, - stride, padding)) - return - tf_logging.info("Testing InceptionFwd %s", (input_size, filter_size, stride, - padding)) - self._CompareFwdValues(input_size, filter_size, [stride, stride], padding) - - return Test - - -def CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type): - """Calculates the size of an output dimension of a strided convolution. - - Given the sizes of the corresponding dimension of the input and filter shapes, - and the stride and padding_types, calculates the size of the output dimension. - This function can be called separately for each input dimension. - - Args: - input_dim: An `int` specifying the size of the input dimension. - filter_dim: An `int` specifying the size of the filter dimension. - stride: An `int` specifying the step size of the convolution along the - input dimension. - padding_type: either 'VALID' or 'SAME'. - - Returns: - The size of the output dimension. - """ - if padding_type == "VALID": - return (input_dim - filter_dim + stride) // stride - else: # padding_type == 'SAME' - return (input_dim + stride - 1) // stride - - -def NchwVectCToNchw(in_tensor): - # [N, C / 4, H, W, 4] => [N, C / 4, 4, H, W] == [N, C, H, W] - t = array_ops.transpose(in_tensor, [0, 1, 4, 2, 3]) - n = in_tensor.shape.dims[0].value - c = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value - h = in_tensor.shape.dims[2].value - w = in_tensor.shape.dims[3].value - return array_ops.reshape(t, [n, c, h, w]) - - -def OihwVectIToHwio(in_tensor): - # [O, I / 4, H, W, 4] => [O, I / 4, 4, H, W] == [O, I, H, W] - t = array_ops.transpose(in_tensor, [2, 3, 1, 4, 0]) - o = in_tensor.shape.dims[0].value - i = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value - h = in_tensor.shape.dims[2].value - w = in_tensor.shape.dims[3].value - return array_ops.reshape(t, [h, w, i, o]) - - -def NchwToNchwVectC(in_tensor): - n, c, h, w = in_tensor.shape.as_list() - assert c % 4 == 0 - t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w]) - return array_ops.transpose(t, [0, 1, 3, 4, 2]) - - -def HwioToOihw(in_tensor): - return array_ops.transpose(in_tensor, [3, 2, 0, 1]) - - -def SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, - padding, strides, side_input_scale, - side_input, biases, apply_relu): - """Simulates the int8 fused 2-D convolution op using separate float ops. - - The arguments and return values have the same format, meanings and - restrictions as the actual op. - Args: - conv_input_scale: A scalar 'float'. - conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. - kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. - padding: A `string` from: `"SAME", "VALID"`. - strides: A list of `ints`. - side_input_scale: A scalar 'float'. - side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. - biases: A `Tensor` of type `float32` in NCHW layout. - apply_relu: A boolean to specify whether to apply "Relu" activation function - that clips outputs to the range [0, 127], or "None" activation that clips - to the range [-128, 127]. - Returns: - A `Tensor` of type `qint8` in NCHW_VECT_C layout. - """ - conv_result = nn_ops.conv2d( - NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), - OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), - strides=strides, - padding=padding, - data_format="NCHW") * conv_input_scale - - conv_and_side_inputs = conv_result + side_input_scale * NchwVectCToNchw( - gen_array_ops.dequantize(side_input, -128, 127)) - - output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") - if apply_relu: - output = nn_ops.relu(output) - - result, _, _ = gen_array_ops.quantize_v2( - NchwToNchwVectC(output), -128, 127, dtypes.qint8) - return result - - -class FusedConvInt8Tests(test.TestCase): - _test_params = [ - { - "batch_size": 1, - "input_channels": 4, - "output_channels": 4, - "input_height": 8, - "input_width": 8, - "filter_height": 6, - "filter_width": 6, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.0, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 1, - "input_channels": 4, - "output_channels": 4, - "input_height": 6, - "input_width": 6, - "filter_height": 6, - "filter_width": 6, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.0, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 2, - "input_channels": 8, - "output_channels": 16, - "input_height": 8, - "input_width": 8, - "filter_height": 3, - "filter_width": 3, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.0, - "bias_scale": 1, - "padding_type": "VALID" - }, - { - "batch_size": 2, - "input_channels": 8, - "output_channels": 16, - "input_height": 8, - "input_width": 8, - "filter_height": 3, - "filter_width": 3, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.0, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 2, - "input_channels": 8, - "output_channels": 16, - "input_height": 8, - "input_width": 8, - "filter_height": 3, - "filter_width": 3, - "vertical_stride": 2, - "horizontal_stride": 2, - "conv_input_scale": 0.002, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "VALID" - }, - { - "batch_size": 2, - "input_channels": 16, - "output_channels": 16, - "input_height": 9, - "input_width": 9, - "filter_height": 3, - "filter_width": 3, - "vertical_stride": 1, - "horizontal_stride": 1, - "conv_input_scale": 0.001, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 3, - "input_channels": 8, - "output_channels": 8, - "input_height": 9, - "input_width": 9, - "filter_height": 5, - "filter_width": 5, - "vertical_stride": 1, - "horizontal_stride": 1, - "conv_input_scale": 0.001, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 3, - "input_channels": 8, - "output_channels": 8, - "input_height": 9, - "input_width": 9, - "filter_height": 7, - "filter_width": 1, - "vertical_stride": 2, - "horizontal_stride": 1, - "conv_input_scale": 0.002, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "SAME" - }, - { - "batch_size": 3, - "input_channels": 8, - "output_channels": 8, - "input_height": 9, - "input_width": 9, - "filter_height": 1, - "filter_width": 7, - "vertical_stride": 1, - "horizontal_stride": 1, - "conv_input_scale": 0.002, - "side_input_scale": 0.5, - "bias_scale": 1, - "padding_type": "SAME" - }, - ] - - def runTest(self, test_param, apply_relu): - batch_size = test_param["batch_size"] - input_channels = test_param["input_channels"] - output_channels = test_param["output_channels"] - input_height = test_param["input_height"] - input_width = test_param["input_width"] - filter_height = test_param["filter_height"] - filter_width = test_param["filter_width"] - vertical_stride = test_param["vertical_stride"] - horizontal_stride = test_param["horizontal_stride"] - conv_input_scale = test_param["conv_input_scale"] - side_input_scale = test_param["side_input_scale"] - bias_scale = test_param["bias_scale"] - padding_type = test_param["padding_type"] - - conv_input, _, _ = gen_array_ops.quantize_v2( - random_ops.random_uniform( - [batch_size, input_channels // 4, input_height, input_width, 4], - minval=-0.0, - maxval=1.0, - dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) - - kernel, _, _ = gen_array_ops.quantize_v2( - random_ops.random_uniform( - [ - output_channels, input_channels // 4, filter_height, - filter_width, 4 - ], - minval=-1.0, - maxval=1.0, - dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) - - output_height = CalculateConvolvedOutputDim(input_height, filter_height, - vertical_stride, padding_type) - output_width = CalculateConvolvedOutputDim(input_width, filter_width, - horizontal_stride, padding_type) - tf_logging.info("output_height=", output_height, ", output_width=", - output_width) - - side_input, _, _ = gen_array_ops.quantize_v2( - random_ops.random_uniform( - [batch_size, output_channels // 4, output_height, output_width, 4], - minval=0.0, - maxval=1.0, - dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) - - biases = random_ops.random_uniform( - [output_channels], - minval=-10 * bias_scale, - maxval=20 * bias_scale, - dtype=dtypes.float32) - - strides = [1, 1, vertical_stride, horizontal_stride] - - actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( - conv_input, - kernel, - biases, - strides=strides, - padding=padding_type, - conv_input_scale=conv_input_scale, - side_input_scale=side_input_scale, - side_input=side_input, - activation_mode="Relu" if apply_relu else "None", - data_format="NCHW_VECT_C", - filter_format="OIHW_VECT_I") - expected = SimulateFusedConv2dBiasActivationInt8( - conv_input_scale, conv_input, kernel, padding_type, strides, - side_input_scale, side_input, biases, apply_relu) - with self.test_session(use_gpu=True) as sess: - actual_y, expected_y = sess.run([actual, expected]) - self.assertAllClose(actual_y, expected_y, rtol=0, atol=1) +# Instantiate the two test suites from test_base, mixing in test.TestCase as +# the test framework. +class FusedConv2DBiasActivationTest(test_base.FusedConv2DBiasActivationTest, + test.TestCase): + pass - def testFusedConvInt8(self): - if not test.is_gpu_available( - cuda_only=True, min_cuda_compute_capability=(6, 1)): - tf_logging.info("int8 test skipped because not run with --config=cuda or " - "no GPUs with compute capability >= 6.1 are available.") - return - for apply_relu in [True, False]: - for test_param in self._test_params: - self.runTest(test_param, apply_relu) +class FusedConvInt8Tests(test_base.FusedConvInt8Tests, test.TestCase): + pass -if __name__ == "__main__": - for index, (input_size_, filter_size_, output_size_, stride_, - padding_) in enumerate(GetShrunkInceptionShapes()): - setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_" + str(index), - GetInceptionFwdTest(input_size_, filter_size_, stride_, padding_)) - # TODO(b/35359731) - # Fwd, BckInput, and BackFilter to test that for certain input parameter - # set, winograd nonfused algorithm will be excluded from conv autotune. If - # in such case, winograd nonfused algorithm is added as one option of the - # conv autotune, and cuDNN version is smaller than 7, the following tests - # will fail. - ishape = [1, 400, 400, 1] - fshape = [1, 1, 1, 256] - oshape = [1, 400, 400, 256] - setattr(FusedConv2DBiasActivationTest, - "testInceptionFwd_No_Winograd_Nonfused", - GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True)) +if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py new file mode 100644 index 0000000000..35fc65e4ba --- /dev/null +++ b/tensorflow/contrib/fused_conv/python/ops/fused_conv2d_bias_activation_op_test_base.py @@ -0,0 +1,945 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Provides test suites that can be run to test fused convolutions. + +Each of the two test suites in this module, FusedConv2DBiasActivationTest and +FusedConvInt8Tests, should be "instantiated" by declaring a class which inherits +from the FusedConv test and a class that provides the standard test.TestCase +API. + +See e.g. fused_conv2d_bias_activation_op_test.py in this folder. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import contextlib +import numpy as np + +from tensorflow.contrib.fused_conv.python.ops import fused_conv2d_bias_activation_op +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import random_ops +from tensorflow.python.platform import test +from tensorflow.python.platform import tf_logging + + +def _GetShrunkInceptionShapes(shrink=10): + """Iterator for smaller versions of convolution shapes in 2015 Inception. + + Relative to inception, each depth value is `depth // shrink`. + + Args: + shrink: Factor to shrink each depth value by relative to Inception. + + Yields: + Tuple (input_size, filter_size, out_size, stride, padding), the convolution + parameters of Inception layers. + """ + input_sizes = [[4, 5, 5, 1248], [4, 8, 8, 384], [4, 8, 8, 384], [ + 4, 8, 8, 2048 + ], [4, 8, 8, 448], [4, 8, 8, 2048], [4, 8, 8, 2048], [4, 8, 8, 2048], [ + 4, 8, 8, 1760 + ], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 8, 8, 1760], [4, 17, 17, 192], [ + 4, 17, 17, 192 + ], [4, 17, 17, 1248], [4, 17, 17, 128], [4, 17, 17, 1248], [4, 17, 17, 224], [ + 4, 17, 17, 192 + ], [4, 17, 17, 192], [4, 17, 17, 1216], [4, 17, 17, 1216], [4, 17, 17, 224], [ + 4, 17, 17, 192 + ], [4, 17, 17, 192], [4, 17, 17, 1152], [4, 17, 17, 1152], [4, 17, 17, 192], [ + 4, 17, 17, 160 + ], [4, 17, 17, 1152], [4, 17, 17, 1024], [4, 17, 17, 128], [4, 17, 17, 1024], + [4, 17, 17, 128], [4, 17, 17, 1024], [4, 17, 17, 128], [ + 4, 17, 17, 768 + ], [4, 17, 17, 128], [4, 17, 17, 128], [4, 17, 17, 768], + [4, 17, 17, 768], [4, 35, 35, 96], [4, 35, 35, 288], [ + 4, 35, 35, 64 + ], [4, 35, 35, 288], [4, 35, 35, 256], [4, 35, 35, 48], [ + 4, 35, 35, 256 + ], [4, 35, 35, 96], [4, 35, 35, 192], [4, 35, 35, 192], [ + 4, 35, 35, 192 + ], [4, 73, 73, 64], [4, 73, 73, 64], [4, 147, 147, 24]] + filter_sizes = [[1, 1, 1248, 128], [1, 3, 384, 384], [3, 1, 384, 384], [ + 1, 1, 2048, 192 + ], [3, 3, 448, 384], [1, 1, 2048, 320], [1, 1, 2048, 448], [1, 1, 2048, 384], + [1, 1, 1760, 384], [1, 1, 1760, 192], [1, 1, 1760, 448], [ + 1, 1, 1760, 320 + ], [3, 3, 192, 192], [3, 3, 192, 192], [1, 1, 1248, 192], [ + 3, 3, 128, 320 + ], [1, 1, 1248, 128], [1, 3, 224, 224], [3, 1, 192, 256], [ + 1, 3, 192, 256 + ], [1, 1, 1216, 192], [1, 1, 1216, 96], [3, 1, 224, 224], [ + 3, 3, 192, 224 + ], [1, 3, 192, 192], [1, 1, 1152, 192], [1, 1, 1152, 128], [ + 3, 1, 192, 192 + ], [3, 3, 160, 192], [1, 1, 1152, 160], [1, 1, 1024, 128], [ + 1, 3, 128, 192 + ], [1, 1, 1024, 160], [3, 1, 128, 192], [1, 1, 1024, 256], [ + 3, 1, 128, 128 + ], [1, 1, 768, 192], [1, 3, 128, 128], [3, 3, 128, 128], [ + 1, 1, 768, 128 + ], [1, 1, 768, 320], [3, 3, 96, 96], [3, 3, 288, 384], [ + 3, 3, 64, 96 + ], [1, 1, 288, 64], [1, 1, 256, 64], [5, 5, 48, 64], + [1, 1, 256, 48], [3, 3, 96, 96], [1, 1, 192, 32], [ + 1, 1, 192, 64 + ], [1, 1, 192, 48], [3, 3, 64, 192], [1, 1, 64, + 64], [1, 1, 24, 64]] + out_sizes = [[4, 5, 5, 128], [4, 8, 8, 384], [4, 8, 8, 384], [4, 8, 8, 192], [ + 4, 8, 8, 384 + ], [4, 8, 8, 320], [4, 8, 8, 448], [4, 8, 8, 384], [4, 8, 8, 384], [ + 4, 8, 8, 192 + ], [4, 8, 8, 448], [4, 8, 8, 320], [4, 8, 8, 192], [4, 17, 17, 192], [ + 4, 17, 17, 192 + ], [4, 8, 8, 320], [4, 17, 17, 128], [4, 17, 17, 224], [4, 17, 17, 256], [ + 4, 17, 17, 256 + ], [4, 17, 17, 192], [4, 17, 17, 96], [4, 17, 17, 224], [4, 17, 17, 224], [ + 4, 17, 17, 192 + ], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 192], [ + 4, 17, 17, 160 + ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 160], [4, 17, 17, 192], [ + 4, 17, 17, 256 + ], [4, 17, 17, 128], [4, 17, 17, 192], [4, 17, 17, 128], [4, 17, 17, 128], [ + 4, 17, 17, 128 + ], [4, 17, 17, 320], [4, 17, 17, 96], [4, 17, 17, 384], [4, 35, 35, 96], [ + 4, 35, 35, 64 + ], [4, 35, 35, 64], [4, 35, 35, 64], [4, 35, 35, 48], [4, 35, 35, 96], + [4, 35, 35, 32], [4, 35, 35, 64], [4, 35, 35, 48], + [4, 71, 71, 192], [4, 73, 73, 64], [4, 147, 147, 64]] + strides = [ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + ] + # Shrink sizes to make the test faster + for i in input_sizes: + i[3] //= shrink + for f in filter_sizes: + f[2] //= shrink + f[3] //= shrink + for o in out_sizes: + o[3] //= shrink + # pylint: disable=invalid-name + VALID = "VALID" + SAME = "SAME" + # pylint: enable=invalid-name + paddings = [ + SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, + VALID, SAME, SAME, VALID, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, + SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, SAME, + SAME, SAME, SAME, SAME, SAME, VALID, VALID, SAME, SAME, SAME, SAME, SAME, + SAME, SAME, SAME, SAME, VALID, VALID, VALID + ] + for i, f, o, s, p in zip(input_sizes, filter_sizes, out_sizes, strides, + paddings): + yield i, f, o, s, p + + +def _GetTestConfigs(): + """Get all the valid tests configs to run. + + Returns: + all the valid test configs as tuples of data_format and use_gpu. + """ + test_configs = [("NCHW", True), ("NHWC", True)] + return test_configs + + +def _IotaNdF32Constant(dim_sizes): + + def MakeList(dims): + if len(dims) == 1: + return [float(1 + f) for f in range(dims[0])] + return [MakeList(dims[1:]) for _ in range(dims[0])] + + return constant_op.constant(MakeList(dim_sizes), dtype=dtypes.float32) + + +def _GetInceptionFwdTest(input_size, + filter_size, + stride, + padding, + gpu_only=True): + + def Test(self): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping InceptionFwd %s", + (input_size, filter_size, stride, padding)) + return + tf_logging.info("Testing InceptionFwd %s", + (input_size, filter_size, stride, padding)) + self.CompareFwdValues(input_size, filter_size, [stride, stride], padding) + + return Test + + +class FusedConv2DBiasActivationTest(object): + + @contextlib.contextmanager + def test_scope(self): # pylint: disable=invalid-name + """Can be overridden in base classes to provide a test scope.""" + yield + + def _DtypesToTest(self, use_gpu): + return [dtypes.float32] + + def _FilterFormatsToTest(self, use_gpu): + return ["HWIO", "OIHW"] + + def _SetupValuesForDevice(self, tensor_in_sizes, filter_in_sizes, bias, + strides, padding, activation_mode, data_format, + filter_format, dtype): + """Verifies the output values of the convolution function. + + Args: + tensor_in_sizes: Input tensor dimensions in + [batch, input_rows, input_cols, input_depth]. + filter_in_sizes: Filter tensor dimensions in + [kernel_rows, kernel_cols, input_depth, output_depth]. + bias: 1-D bias tensor of length output_depth. + strides: Stride: [col_stride, row_stride] + padding: Padding type. + activation_mode: Activation mode. + data_format: Format of the data tensors. + filter_format: Filter format to use for the fused convolution. + dtype: Data type for inputs and outputs. + Returns: + Symbolic tensor value and reference value that can be used to + execute the computation and verify the results. + """ + input_size = np.prod(tensor_in_sizes) + filter_size = np.prod(filter_in_sizes) + bias_size = filter_in_sizes[-1] # equals to output depth + # Initializes the input tensor with array containing incrementing + # numbers from 1. + x1 = [f * 1.0 for f in range(1, input_size + 1)] + x2 = [f * 1.0 for f in range(1, filter_size + 1)] + # This is to guarantee that there are always negative values after + # bias add so that we can test whether relu works correctly. + x3 = bias + with self.cached_session(use_gpu=True), self.test_scope(): + t1 = constant_op.constant(x1, shape=tensor_in_sizes, dtype=dtype) + t2 = constant_op.constant(x2, shape=filter_in_sizes, dtype=dtype) + fused_t2 = t2 + if filter_format == "OIHW": + fused_t2 = _HwioToOihw(t2) + t3 = constant_op.constant(x3, shape=[bias_size], dtype=dtype) + strides = [1] + strides + [1] + if data_format == "NCHW": + t1 = test_util.NHWCToNCHW(t1) + strides = test_util.NHWCToNCHW(strides) + output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + t1, + fused_t2, + t3, + strides=strides, + padding=padding, + data_format=data_format, + filter_format=filter_format, + activation_mode=activation_mode) + ref_conv_output = nn_ops.conv2d( + t1, t2, strides=strides, padding=padding, data_format=data_format) + ref_bias_output = nn_ops.bias_add( + ref_conv_output, t3, data_format=data_format) + ref_output = nn_ops.relu(ref_bias_output) + if data_format == "NCHW": + output = test_util.NCHWToNHWC(output) + ref_output = test_util.NCHWToNHWC(ref_output) + + return output, ref_output + + def CompareFwdValues(self, tensor_in_sizes, filter_in_sizes, conv_strides, + padding): + """Verifies that CPU and GPU produce the same values. + + Args: + tensor_in_sizes: Input tensor dimensions in + [batch, input_rows, input_cols, input_depth]. + filter_in_sizes: Filter tensor dimensions in + [kernel_rows, kernel_cols, input_depth, output_depth]. + conv_strides: [row_stride, col_stride] for the convolution; + padding: Padding type. + """ + x1 = np.random.rand(*tensor_in_sizes).astype(np.float32) + x2 = np.random.rand(*filter_in_sizes).astype(np.float32) + x3 = np.random.rand(*[filter_in_sizes[-1]]).astype(np.float32) + + def _SetupVal(data_format, use_gpu): + with self.cached_session(use_gpu=use_gpu), self.test_scope(): + t1 = constant_op.constant(x1, shape=tensor_in_sizes) + t2 = constant_op.constant(x2, shape=filter_in_sizes) + t3 = constant_op.constant(x3, shape=[filter_in_sizes[-1]]) + strides = [1] + conv_strides + [1] + if data_format == "NCHW": + t1 = test_util.NHWCToNCHW(t1) + strides = test_util.NHWCToNCHW(strides) + output = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + t1, + t2, + t3, + strides=strides, + padding=padding, + data_format=data_format, + activation_mode="Relu") + + if data_format == "NCHW": + output = test_util.NCHWToNHWC(output) + return output + + tensors = [] + for (data_format, use_gpu) in _GetTestConfigs(): + tensors.append(_SetupVal(data_format, use_gpu)) + with self.cached_session() as sess, self.test_scope(): + values = sess.run(tensors) + for i in range(1, len(values)): + self.assertAllClose(values[0], values[i], rtol=1e-3, atol=1e-3) + + def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, bias, strides, + padding): + tensors = [] + ref_tensors = [] + for (data_format, use_gpu) in _GetTestConfigs(): + for dtype in self._DtypesToTest(use_gpu): + for filter_format in self._FilterFormatsToTest(use_gpu): + result, expected = self._SetupValuesForDevice( + tensor_in_sizes, filter_in_sizes, bias, strides, padding, "Relu", + data_format, filter_format, dtype) + tensors.append(result) + ref_tensors.append(expected) + with self.cached_session() as sess, self.test_scope(): + values = sess.run(tensors) + ref_values = sess.run(ref_tensors) + for i in range(len(tensors)): + conv = tensors[i] + value = values[i] + ref_value = ref_values[i] + tf_logging.info("expected = %s", ref_value) + tf_logging.info("actual = %s", value) + tol = 1e-5 + if value.dtype == np.float16: + tol = 1e-3 + self.assertAllClose( + np.ravel(ref_value), np.ravel(value), atol=tol, rtol=tol) + self.assertShapeEqual(value, conv) + + def testConv2D1x1Filter(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D1x1Filter test.") + return + # expected_output = [ + # 0.0, 0.0, 0.0, 21.0, 0.0, 0.0, 57.0, 0.0, 0.0, 93.0, 41.0, 0.0, 129.0, + # 86.0, 43.0, 165.0, 131.0, 97.0 + # ] + medians = [-45.0, -130.0, -215.0] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[1, 1, 3, 3], + bias=medians, + strides=[1, 1], + padding="VALID") + + def testConv2DEmpty(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2DEmpty test.") + return + # expected_output = [] + self._VerifyValues( + tensor_in_sizes=[0, 2, 3, 3], + filter_in_sizes=[1, 1, 3, 3], + bias=[0.0, 0.0, 0.0], + strides=[1, 1], + padding="VALID") + + def testConv2D2x2Filter(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D2x2Filter test.") + return + # expected_output = [0.0, 0.0, 0.0, 401.0, 533.0, 665.0] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + bias=[-2500.0, -2500.0, -2500.0], + strides=[1, 1], + padding="VALID") + + def testConv2D1x2Filter(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D1x2Filter test.") + return + # expected_output = [ + # 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 190.0, 265.0, 340.0, 343.0, 436.0, 529.0 + # ] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[1, 2, 3, 3], + bias=[-500.0, -500.0, -500.0], + strides=[1, 1], + padding="VALID") + + def testConv2D2x2FilterStride2(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D2x2FilterStride2 test.") + return + # expected_output = [0.0, 67.0, 163.0] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + bias=[-2300.0, -2300.0, -2300.0], + strides=[2, 2], + padding="VALID") + + def testConv2D2x2FilterStride2Same(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D2x2FilterStride2Same test.") + return + # expected_output = [0.0, 2367.0, 2463.0, 1230.0, 1305.0, 1380.0] + self._VerifyValues( + tensor_in_sizes=[1, 2, 3, 3], + filter_in_sizes=[2, 2, 3, 3], + bias=[-2300.0, -1000.0, -1000.0], + strides=[2, 2], + padding="SAME") + + def testConv2D2x2FilterStride1x2(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2D2x2FilterStride1x2 test.") + return + # expected_output = [0.0, 0.0, 8.0, 28.0, 48.0, 68.0] + self._VerifyValues( + tensor_in_sizes=[1, 3, 6, 1], + filter_in_sizes=[2, 2, 1, 1], + bias=[-90.0], + strides=[1, 2], + padding="VALID") + + def testConv2DKernelSmallerThanStrideValid(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2DKernelSmallerThanStrideValid test.") + return + # expected_output = [0, 0, 175, 205] + self._VerifyValues( + tensor_in_sizes=[1, 7, 7, 1], + filter_in_sizes=[2, 2, 1, 1], + bias=[-100.0], + strides=[3, 3], + padding="VALID") + + def testConv2DKernelSmallerThanStrideSame(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2DKernelSmallerThanStrideSame test.") + return + # expected = [0, 0, 2, 4] + self._VerifyValues( + tensor_in_sizes=[1, 3, 3, 1], + filter_in_sizes=[1, 1, 1, 1], + bias=[-5.0], + strides=[2, 2], + padding="SAME") + + # expected = [0, 0, 4, 6] + self._VerifyValues( + tensor_in_sizes=[1, 4, 4, 1], + filter_in_sizes=[1, 1, 1, 1], + bias=[-5.0], + strides=[2, 2], + padding="SAME") + + # expected = [4, 0, 1, 0] + self._VerifyValues( + tensor_in_sizes=[1, 4, 4, 1], + filter_in_sizes=[2, 2, 1, 1], + bias=[-40.0], + strides=[3, 3], + padding="SAME") + + def testConv2DKernelSizeMatchesInputSize(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping Conv2DKernelSizeMatchesInputSize test.") + return + # expected = [0, 5] + self._VerifyValues( + tensor_in_sizes=[1, 2, 2, 1], + filter_in_sizes=[2, 2, 1, 2], + bias=[-50.0, -55.0], + strides=[1, 1], + padding="VALID") + + # expected = [0, 2, 282, 322] + self._VerifyValues( + tensor_in_sizes=[1, 8, 8, 1], + filter_in_sizes=[2, 2, 1, 1], + bias=[-200.0], + strides=[4, 4], + padding="SAME") + + def testShapeFunctionEdgeCases(self): + # All shapes unknown. + c1 = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + array_ops.placeholder(dtypes.float32), + array_ops.placeholder(dtypes.float32), + array_ops.placeholder(dtypes.float32), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Relu") + self.assertEqual([None, None, None, None], c1.get_shape().as_list()) + + # Incorrect input shape. + with self.assertRaises(ValueError): + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + array_ops.placeholder(dtypes.float32, shape=[1, 3]), + array_ops.placeholder(dtypes.float32), + array_ops.placeholder(dtypes.float32), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Relu") + + # Incorrect filter shape. + with self.assertRaises(ValueError): + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + array_ops.placeholder(dtypes.float32), + array_ops.placeholder(dtypes.float32, shape=[1, 3]), + array_ops.placeholder(dtypes.float32), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Relu") + + # Depth mismatch. + with self.assertRaises(ValueError): + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + array_ops.placeholder(dtypes.float32, shape=[32, 20, 20, 3]), + array_ops.placeholder(dtypes.float32, shape=[4, 4, 2, 2]), + array_ops.placeholder(dtypes.float32), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Relu") + + def testOpEdgeCases(self, gpu_only=True): + if gpu_only and not test.is_gpu_available(): + tf_logging.info("Skipping OpEdgeCases tests.") + return + with self.cached_session() as sess, self.test_scope(): + # Illegal strides. + with self.assertRaisesRegexp( + errors_impl.UnimplementedError, + ".*strides.*in the batch and depth dimensions"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1]), + strides=[2, 1, 1, 1], + padding="SAME", + activation_mode="Relu")) + with self.assertRaisesRegexp( + errors_impl.UnimplementedError, + ".*strides.*in the batch and depth dimensions"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1]), + strides=[1, 1, 1, 2], + padding="SAME", + activation_mode="Relu")) + + # Illegal activation mode. + with self.assertRaisesRegexp(ValueError, + "Op passed string 'Tanh' not in:"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1, 1, 1, 1]), + _IotaNdF32Constant([1]), + strides=[1, 1, 1, 1], + padding="SAME", + activation_mode="Tanh")) + + # Filter larger than input. + with self.assertRaisesRegexp(ValueError, "Negative dimension size"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([32, 20, 20, 3]), + _IotaNdF32Constant([20, 21, 3, 2]), + _IotaNdF32Constant([2]), + strides=[1, 1, 1, 1], + padding="VALID", + activation_mode="Relu")) + with self.assertRaisesRegexp(ValueError, "Negative dimension size"): + sess.run( + fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + _IotaNdF32Constant([32, 20, 20, 3]), + _IotaNdF32Constant([21, 20, 3, 2]), + _IotaNdF32Constant([2]), + strides=[1, 1, 1, 1], + padding="VALID", + activation_mode="Relu")) + + +# Add InceptionFwd tests to FusedConv2DBiasActivationTest. +for index, (input_size_, filter_size_, output_size_, stride_, + padding_) in enumerate(_GetShrunkInceptionShapes()): + setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_" + str(index), + _GetInceptionFwdTest(input_size_, filter_size_, stride_, padding_)) + +# TODO(b/35359731) +# Fwd, BckInput, and BackFilter to test that for certain input parameter +# set, winograd nonfused algorithm will be excluded from conv autotune. If +# in such case, winograd nonfused algorithm is added as one option of the +# conv autotune, and cuDNN version is smaller than 7, the following tests +# will fail. +ishape = [1, 400, 400, 1] +fshape = [1, 1, 1, 256] +oshape = [1, 400, 400, 256] +setattr(FusedConv2DBiasActivationTest, "testInceptionFwd_No_Winograd_Nonfused", + _GetInceptionFwdTest(ishape, fshape, 1, "SAME", gpu_only=True)) + + +def _CalculateConvolvedOutputDim(input_dim, filter_dim, stride, padding_type): + """Calculates the size of an output dimension of a strided convolution. + + Given the sizes of the corresponding dimension of the input and filter shapes, + and the stride and padding_types, calculates the size of the output dimension. + This function can be called separately for each input dimension. + + Args: + input_dim: An `int` specifying the size of the input dimension. + filter_dim: An `int` specifying the size of the filter dimension. + stride: An `int` specifying the step size of the convolution along the + input dimension. + padding_type: either 'VALID' or 'SAME'. + + Returns: + The size of the output dimension. + """ + if padding_type == "VALID": + return (input_dim - filter_dim + stride) // stride + else: # padding_type == 'SAME' + return (input_dim + stride - 1) // stride + + +def _NchwVectCToNchw(in_tensor): + # [N, C / 4, H, W, 4] => [N, C / 4, 4, H, W] == [N, C, H, W] + t = array_ops.transpose(in_tensor, [0, 1, 4, 2, 3]) + n = in_tensor.shape.dims[0].value + c = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value + h = in_tensor.shape.dims[2].value + w = in_tensor.shape.dims[3].value + return array_ops.reshape(t, [n, c, h, w]) + + +def _OihwVectIToHwio(in_tensor): + # [O, I / 4, H, W, 4] => [O, I / 4, 4, H, W] == [O, I, H, W] + t = array_ops.transpose(in_tensor, [2, 3, 1, 4, 0]) + o = in_tensor.shape.dims[0].value + i = in_tensor.shape.dims[1].value * in_tensor.shape.dims[4].value + h = in_tensor.shape.dims[2].value + w = in_tensor.shape.dims[3].value + return array_ops.reshape(t, [h, w, i, o]) + + +def _NchwToNchwVectC(in_tensor): + n, c, h, w = in_tensor.shape.as_list() + assert c % 4 == 0 + t = array_ops.reshape(in_tensor, [n, c // 4, 4, h, w]) + return array_ops.transpose(t, [0, 1, 3, 4, 2]) + + +def _HwioToOihw(in_tensor): + return array_ops.transpose(in_tensor, [3, 2, 0, 1]) + + +def _SimulateFusedConv2dBiasActivationInt8(conv_input_scale, conv_input, kernel, + padding, strides, side_input_scale, + side_input, biases, apply_relu): + """Simulates the int8 fused 2-D convolution op using separate float ops. + + The arguments and return values have the same format, meanings and + restrictions as the actual op. + Args: + conv_input_scale: A scalar 'float'. + conv_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. + kernel: A `Tensor` of type `qint8` in OIHW_VECT_I layout. + padding: A `string` from: `"SAME", "VALID"`. + strides: A list of `ints`. + side_input_scale: A scalar 'float'. + side_input: A `Tensor` of type `qint8` in NCHW_VECT_C layout. + biases: A `Tensor` of type `float32` in NCHW layout. + apply_relu: A boolean to specify whether to apply "Relu" activation function + that clips outputs to the range [0, 127], or "None" activation that clips + to the range [-128, 127]. + Returns: + A `Tensor` of type `qint8` in NCHW_VECT_C layout. + """ + conv_result = nn_ops.conv2d( + _NchwVectCToNchw(gen_array_ops.dequantize(conv_input, -128, 127)), + _OihwVectIToHwio(gen_array_ops.dequantize(kernel, -128, 127)), + strides=strides, + padding=padding, + data_format="NCHW") * conv_input_scale + + conv_and_side_inputs = conv_result + side_input_scale * _NchwVectCToNchw( + gen_array_ops.dequantize(side_input, -128, 127)) + + output = nn_ops.bias_add(conv_and_side_inputs, biases, data_format="NCHW") + if apply_relu: + output = nn_ops.relu(output) + + result, _, _ = gen_array_ops.quantize_v2( + _NchwToNchwVectC(output), -128, 127, dtypes.qint8) + return result + + +# TODO(b/114580749): XLA:CPU/GPU don't support int8 at the moment, so this test +# doesn't currently use XLA. +class FusedConvInt8Tests(object): + _test_params = [ + { + "batch_size": 1, + "input_channels": 4, + "output_channels": 4, + "input_height": 8, + "input_width": 8, + "filter_height": 6, + "filter_width": 6, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.0, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 1, + "input_channels": 4, + "output_channels": 4, + "input_height": 6, + "input_width": 6, + "filter_height": 6, + "filter_width": 6, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.0, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 2, + "input_channels": 8, + "output_channels": 16, + "input_height": 8, + "input_width": 8, + "filter_height": 3, + "filter_width": 3, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.0, + "bias_scale": 1, + "padding_type": "VALID" + }, + { + "batch_size": 2, + "input_channels": 8, + "output_channels": 16, + "input_height": 8, + "input_width": 8, + "filter_height": 3, + "filter_width": 3, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.0, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 2, + "input_channels": 8, + "output_channels": 16, + "input_height": 8, + "input_width": 8, + "filter_height": 3, + "filter_width": 3, + "vertical_stride": 2, + "horizontal_stride": 2, + "conv_input_scale": 0.002, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "VALID" + }, + { + "batch_size": 2, + "input_channels": 16, + "output_channels": 16, + "input_height": 9, + "input_width": 9, + "filter_height": 3, + "filter_width": 3, + "vertical_stride": 1, + "horizontal_stride": 1, + "conv_input_scale": 0.001, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 3, + "input_channels": 8, + "output_channels": 8, + "input_height": 9, + "input_width": 9, + "filter_height": 5, + "filter_width": 5, + "vertical_stride": 1, + "horizontal_stride": 1, + "conv_input_scale": 0.001, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 3, + "input_channels": 8, + "output_channels": 8, + "input_height": 9, + "input_width": 9, + "filter_height": 7, + "filter_width": 1, + "vertical_stride": 2, + "horizontal_stride": 1, + "conv_input_scale": 0.002, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "SAME" + }, + { + "batch_size": 3, + "input_channels": 8, + "output_channels": 8, + "input_height": 9, + "input_width": 9, + "filter_height": 1, + "filter_width": 7, + "vertical_stride": 1, + "horizontal_stride": 1, + "conv_input_scale": 0.002, + "side_input_scale": 0.5, + "bias_scale": 1, + "padding_type": "SAME" + }, + ] + + @contextlib.contextmanager + def test_scope(self): # pylint: disable=invalid-name + """Can be overridden in base classes to provide a test scope.""" + yield + + def runTest(self, test_param, apply_relu): + batch_size = test_param["batch_size"] + input_channels = test_param["input_channels"] + output_channels = test_param["output_channels"] + input_height = test_param["input_height"] + input_width = test_param["input_width"] + filter_height = test_param["filter_height"] + filter_width = test_param["filter_width"] + vertical_stride = test_param["vertical_stride"] + horizontal_stride = test_param["horizontal_stride"] + conv_input_scale = test_param["conv_input_scale"] + side_input_scale = test_param["side_input_scale"] + bias_scale = test_param["bias_scale"] + padding_type = test_param["padding_type"] + + with self.cached_session(use_gpu=True) as sess, self.test_scope(): + conv_input, _, _ = gen_array_ops.quantize_v2( + random_ops.random_uniform( + [batch_size, input_channels // 4, input_height, input_width, 4], + minval=-0.0, + maxval=1.0, + dtype=dtypes.float32), -1.0, 1.0, dtypes.qint8) + + kernel, _, _ = gen_array_ops.quantize_v2( + random_ops.random_uniform([ + output_channels, input_channels // 4, filter_height, filter_width, + 4 + ], + minval=-1.0, + maxval=1.0, + dtype=dtypes.float32), -1.0, 1.0, + dtypes.qint8) + + output_height = _CalculateConvolvedOutputDim( + input_height, filter_height, vertical_stride, padding_type) + output_width = _CalculateConvolvedOutputDim( + input_width, filter_width, horizontal_stride, padding_type) + tf_logging.info("output_height=%s, output_width=%s", output_height, + output_width) + + side_input, _, _ = gen_array_ops.quantize_v2( + random_ops.random_uniform([ + batch_size, output_channels // 4, output_height, output_width, 4 + ], + minval=0.0, + maxval=1.0, + dtype=dtypes.float32), -1.0, 1.0, + dtypes.qint8) + + biases = random_ops.random_uniform([output_channels], + minval=-10 * bias_scale, + maxval=20 * bias_scale, + dtype=dtypes.float32) + + strides = [1, 1, vertical_stride, horizontal_stride] + + actual = fused_conv2d_bias_activation_op.fused_conv2d_bias_activation( + conv_input, + kernel, + biases, + strides=strides, + padding=padding_type, + conv_input_scale=conv_input_scale, + side_input_scale=side_input_scale, + side_input=side_input, + activation_mode="Relu" if apply_relu else "None", + data_format="NCHW_VECT_C", + filter_format="OIHW_VECT_I") + + expected = _SimulateFusedConv2dBiasActivationInt8( + conv_input_scale, conv_input, kernel, padding_type, strides, + side_input_scale, side_input, biases, apply_relu) + + actual_y, expected_y = sess.run([actual, expected]) + self.assertAllClose(actual_y, expected_y, rtol=0, atol=1) + + def testFusedConvInt8(self): + if not test.is_gpu_available( + cuda_only=True, min_cuda_compute_capability=(6, 1)): + tf_logging.info("int8 test skipped because not run with --config=cuda or " + "no GPUs with compute capability >= 6.1 are available.") + return + for apply_relu in [True, False]: + for test_param in self._test_params: + self.runTest(test_param, apply_relu) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/tools/pip_package/pip_smoke_test.py b/tensorflow/tools/pip_package/pip_smoke_test.py index bfc007bc39..c6ef82ccdc 100644 --- a/tensorflow/tools/pip_package/pip_smoke_test.py +++ b/tensorflow/tools/pip_package/pip_smoke_test.py @@ -90,6 +90,7 @@ BLACKLIST = [ "//tensorflow/contrib/lite/python:interpreter.py", "//tensorflow/contrib/lite/python:interpreter_test.py", "//tensorflow/contrib/ffmpeg:test_data", + "//tensorflow/contrib/fused_conv:fused_conv2d_bias_activation_op_test_base", "//tensorflow/contrib/hadoop:test_data", "//tensorflow/contrib/factorization/examples:mnist", "//tensorflow/contrib/factorization/examples:mnist.py", -- GitLab From db3e59a545f06780583ad839da9e19d847dfd392 Mon Sep 17 00:00:00 2001 From: Nupur Garg Date: Thu, 27 Sep 2018 11:11:34 -0700 Subject: [PATCH 086/570] Internal change. PiperOrigin-RevId: 214804105 --- .../testing/model_coverage/model_coverage_lib.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py index f8ab394c60..5ca57d083d 100644 --- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py +++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib.py @@ -183,7 +183,11 @@ def compare_models_random_data(tflite_model, tf_eval_func, tolerance=5): np.testing.assert_almost_equal(tf_result, tflite_result, tolerance) -def test_frozen_graph(filename, input_arrays, output_arrays, **kwargs): +def test_frozen_graph(filename, + input_arrays, + output_arrays, + input_shapes=None, + **kwargs): """Validates the TensorFlow frozen graph converts to a TFLite model. Converts the TensorFlow frozen graph to TFLite and checks the accuracy of the @@ -193,10 +197,14 @@ def test_frozen_graph(filename, input_arrays, output_arrays, **kwargs): filename: Full filepath of file containing frozen GraphDef. input_arrays: List of input tensors to freeze graph with. output_arrays: List of output tensors to freeze graph with. + input_shapes: Dict of strings representing input tensor names to list of + integers representing input shapes (e.g., {"foo" : [1, 16, 16, 3]}). + Automatically determined when input shapes is None (e.g., {"foo" : None}). + (default None) **kwargs: Additional arguments to be passed into the converter. """ converter = _lite.TocoConverter.from_frozen_graph(filename, input_arrays, - output_arrays) + output_arrays, input_shapes) tflite_model = _convert(converter, **kwargs) tf_eval_func = evaluate_frozen_graph(filename, input_arrays, output_arrays) -- GitLab From d2a674a959101c35b8cf65c79a603baa16936805 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 11:28:48 -0700 Subject: [PATCH 087/570] Update ops-related pbtxt files. PiperOrigin-RevId: 214807362 --- tensorflow/core/ops/compat/ops_history.v1.pbtxt | 11 +++++++++++ tensorflow/core/ops/ops.pbtxt | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index cac4259356..7625524674 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -74910,6 +74910,17 @@ op { type: "type" } } +op { + name: "UnicodeScript" + input_arg { + name: "input" + type: DT_INT32 + } + output_arg { + name: "output" + type: DT_INT32 + } +} op { name: "UniformCandidateSampler" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index e173c2d072..83af07431c 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -35648,6 +35648,17 @@ op { type: "type" } } +op { + name: "UnicodeScript" + input_arg { + name: "input" + type: DT_INT32 + } + output_arg { + name: "output" + type: DT_INT32 + } +} op { name: "UniformCandidateSampler" input_arg { -- GitLab From 9a0a768d4416d157664d864d992a62782beea4a4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 11:54:40 -0700 Subject: [PATCH 088/570] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 214812088 --- tensorflow/go/op/wrappers.go | 650 +++++++++++++++++------------------ 1 file changed, 325 insertions(+), 325 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 065c7e3011..96df1eee30 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -4059,50 +4059,6 @@ func FixedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true return op.Output(0), op.Output(1), op.Output(2) } -// Computes the sum along segments of a tensor. -// -// Read -// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) -// for an explanation of segments. -// -// Computes a tensor such that -// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such -// that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` -// need not be sorted and need not cover all values in the full -// range of valid values. -// -// If the sum is empty for a given segment ID `i`, `output[i] = 0`. -// If the given segment ID `i` is negative, the value is dropped and will not be -// added to the sum of the segment. -// -// `num_segments` should equal the number of distinct segment IDs. -// -//
-// -//
-// -// Arguments: -// -// segment_ids: A tensor whose shape is a prefix of `data.shape`. -// -// -// Returns Has same shape as data, except for the first `segment_ids.rank` -// dimensions, which are replaced with a single dimension which has size -// `num_segments`. -func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "UnsortedSegmentSum", - Input: []tf.Input{ - data, segment_ids, num_segments, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // ResourceStridedSliceAssignAttr is an optional argument to ResourceStridedSliceAssign. type ResourceStridedSliceAssignAttr func(optionalAttr) @@ -10714,6 +10670,129 @@ func ReadVariableOp(scope *Scope, resource tf.Output, dtype tf.DataType) (value return op.Output(0) } +// This op consumes a lock created by `MutexLock`. +// +// This op exists to consume a tensor created by `MutexLock` (other than +// direct control dependencies). It should be the only that consumes the tensor, +// and will raise an error if it is not. Its only purpose is to keep the +// mutex lock tensor alive until it is consumed by this op. +// +// **NOTE**: This operation must run on the same device as its input. This may +// be enforced via the `colocate_with` mechanism. +// +// Arguments: +// mutex_lock: A tensor returned by `MutexLock`. +// +// Returns the created operation. +func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "ConsumeMutexLock", + Input: []tf.Input{ + mutex_lock, + }, + } + return scope.AddOperation(opspec) +} + +// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd. +type ResourceScatterNdAddAttr func(optionalAttr) + +// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value. +// +// value: An optional bool. Defaults to True. If True, the assignment will +// be protected by a lock; otherwise the behavior is undefined, +// but may exhibit less contention. +// If not specified, defaults to true +func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Adds sparse `updates` to individual values or slices within a given +// +// variable according to `indices`. +// +// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. +// +// `indices` must be integer tensor, containing indices into `ref`. +// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +// +// The innermost dimension of `indices` (with length `K`) corresponds to +// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th +// dimension of `ref`. +// +// `updates` is `Tensor` of rank `Q-1+P-K` with shape: +// +// ``` +// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. +// ``` +// +// For example, say we want to update 4 scattered elements to a rank-1 tensor to +// 8 elements. In Python, that update would look like this: +// +// ```python +// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True) +// indices = tf.constant([[4], [3], [1] ,[7]]) +// updates = tf.constant([9, 10, 11, 12]) +// update = tf.scatter_nd_add(ref, indices, updates) +// with tf.Session() as sess: +// print sess.run(update) +// ``` +// +// The resulting update to ref would look like this: +// +// [1, 12, 3, 14, 14, 6, 7, 20] +// +// See `tf.scatter_nd` for more details about how to make updates to +// slices. +// +// Arguments: +// ref: A resource handle. Must be from a VarHandleOp. +// indices: A Tensor. Must be one of the following types: int32, int64. +// A tensor of indices into ref. +// updates: A Tensor. Must have the same type as ref. A tensor of +// values to add to ref. +// +// Returns the created operation. +func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceScatterNdAdd", + Input: []tf.Input{ + ref, indices, updates, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// Mutually reduces multiple tensors of identical type and shape. +func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64) (data tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets} + opspec := tf.OpSpec{ + Type: "CollectiveReduce", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Updates the tree ensemble by either adding a layer to the last tree being grown // // or by starting a new tree. @@ -11455,68 +11534,31 @@ func ResourceScatterDiv(scope *Scope, resource tf.Output, indices tf.Output, upd return scope.AddOperation(opspec) } -// ResourceScatterNdAddAttr is an optional argument to ResourceScatterNdAdd. -type ResourceScatterNdAddAttr func(optionalAttr) +// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal. +type StatelessRandomNormalAttr func(optionalAttr) -// ResourceScatterNdAddUseLocking sets the optional use_locking attribute to value. +// StatelessRandomNormalDtype sets the optional dtype attribute to value. // -// value: An optional bool. Defaults to True. If True, the assignment will -// be protected by a lock; otherwise the behavior is undefined, -// but may exhibit less contention. -// If not specified, defaults to true -func ResourceScatterNdAddUseLocking(value bool) ResourceScatterNdAddAttr { +// value: The type of the output. +// If not specified, defaults to DT_FLOAT +func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { return func(m optionalAttr) { - m["use_locking"] = value + m["dtype"] = value } } -// Adds sparse `updates` to individual values or slices within a given -// -// variable according to `indices`. -// -// `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. -// -// `indices` must be integer tensor, containing indices into `ref`. -// It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. -// -// The innermost dimension of `indices` (with length `K`) corresponds to -// indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th -// dimension of `ref`. -// -// `updates` is `Tensor` of rank `Q-1+P-K` with shape: -// -// ``` -// [d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. -// ``` -// -// For example, say we want to update 4 scattered elements to a rank-1 tensor to -// 8 elements. In Python, that update would look like this: -// -// ```python -// ref = tf.Variable([1, 2, 3, 4, 5, 6, 7, 8], use_resource=True) -// indices = tf.constant([[4], [3], [1] ,[7]]) -// updates = tf.constant([9, 10, 11, 12]) -// update = tf.scatter_nd_add(ref, indices, updates) -// with tf.Session() as sess: -// print sess.run(update) -// ``` -// -// The resulting update to ref would look like this: +// Outputs deterministic pseudorandom values from a normal distribution. // -// [1, 12, 3, 14, 14, 6, 7, 20] +// The generated values will have mean 0 and standard deviation 1. // -// See `tf.scatter_nd` for more details about how to make updates to -// slices. +// The outputs are a deterministic function of `shape` and `seed`. // // Arguments: -// ref: A resource handle. Must be from a VarHandleOp. -// indices: A Tensor. Must be one of the following types: int32, int64. -// A tensor of indices into ref. -// updates: A Tensor. Must have the same type as ref. A tensor of -// values to add to ref. +// shape: The shape of the output tensor. +// seed: 2 seeds (shape [2]). // -// Returns the created operation. -func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, updates tf.Output, optional ...ResourceScatterNdAddAttr) (o *tf.Operation) { +// Returns Random values with specified shape. +func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) { if scope.Err() != nil { return } @@ -11525,25 +11567,9 @@ func ResourceScatterNdAdd(scope *Scope, ref tf.Output, indices tf.Output, update a(attrs) } opspec := tf.OpSpec{ - Type: "ResourceScatterNdAdd", - Input: []tf.Input{ - ref, indices, updates, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// Mutually reduces multiple tensors of identical type and shape. -func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key int64, instance_key int64, merge_op string, final_op string, subdiv_offsets []int64) (data tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"group_size": group_size, "group_key": group_key, "instance_key": instance_key, "merge_op": merge_op, "final_op": final_op, "subdiv_offsets": subdiv_offsets} - opspec := tf.OpSpec{ - Type: "CollectiveReduce", + Type: "StatelessRandomNormal", Input: []tf.Input{ - input, + shape, seed, }, Attrs: attrs, } @@ -11551,31 +11577,83 @@ func CollectiveReduce(scope *Scope, input tf.Output, group_size int64, group_key return op.Output(0) } -// StatelessRandomNormalAttr is an optional argument to StatelessRandomNormal. -type StatelessRandomNormalAttr func(optionalAttr) - -// StatelessRandomNormalDtype sets the optional dtype attribute to value. +// Creates a sequence of numbers. // -// value: The type of the output. -// If not specified, defaults to DT_FLOAT -func StatelessRandomNormalDtype(value tf.DataType) StatelessRandomNormalAttr { +// This operation creates a sequence of numbers that begins at `start` and +// extends by increments of `delta` up to but not including `limit`. +// +// For example: +// +// ``` +// # 'start' is 3 +// # 'limit' is 18 +// # 'delta' is 3 +// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15] +// ``` +// +// Arguments: +// start: 0-D (scalar). First entry in the sequence. +// limit: 0-D (scalar). Upper limit of sequence, exclusive. +// delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`. +// +// Returns 1-D. +func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "Range", + Input: []tf.Input{ + start, limit, delta, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. +type ResourceApplyMomentumAttr func(optionalAttr) + +// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { return func(m optionalAttr) { - m["dtype"] = value + m["use_locking"] = value } } -// Outputs deterministic pseudorandom values from a normal distribution. +// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. // -// The generated values will have mean 0 and standard deviation 1. +// value: If `True`, the tensor passed to compute grad will be +// var - lr * momentum * accum, so in the end, the var you get is actually +// var - lr * momentum * accum. +// If not specified, defaults to false +func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { + return func(m optionalAttr) { + m["use_nesterov"] = value + } +} + +// Update '*var' according to the momentum scheme. Set use_nesterov = True if you // -// The outputs are a deterministic function of `shape` and `seed`. +// want to use Nesterov momentum. +// +// accum = accum * momentum + grad +// var -= lr * accum // // Arguments: -// shape: The shape of the output tensor. -// seed: 2 seeds (shape [2]). +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// grad: The gradient. +// momentum: Momentum. Must be a scalar. // -// Returns Random values with specified shape. -func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, optional ...StatelessRandomNormalAttr) (output tf.Output) { +// Returns the created operation. +func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { if scope.Err() != nil { return } @@ -11584,14 +11662,13 @@ func StatelessRandomNormal(scope *Scope, shape tf.Output, seed tf.Output, option a(attrs) } opspec := tf.OpSpec{ - Type: "StatelessRandomNormal", + Type: "ResourceApplyMomentum", Input: []tf.Input{ - shape, seed, + var_, accum, lr, grad, momentum, }, Attrs: attrs, } - op := scope.AddOperation(opspec) - return op.Output(0) + return scope.AddOperation(opspec) } // DepthwiseConv2dNativeBackpropFilterAttr is an optional argument to DepthwiseConv2dNativeBackpropFilter. @@ -15062,6 +15139,78 @@ func StringToHashBucketFast(scope *Scope, input tf.Output, num_buckets int64) (o return op.Output(0) } +// Returns the last element of the input list as well as a list with all but that element. +// +// Fails if the list is empty. +// +// input_handle: the input list +// tensor: the withdrawn last element of the list +// element_dtype: the type of elements in the list +// element_shape: the shape of the output tensor +func TensorListPopBack(scope *Scope, input_handle tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"element_dtype": element_dtype} + opspec := tf.OpSpec{ + Type: "TensorListPopBack", + Input: []tf.Input{ + input_handle, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) +} + +// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. +type MaxPoolGradGradAttr func(optionalAttr) + +// MaxPoolGradGradDataFormat sets the optional data_format attribute to value. +// +// value: Specify the data format of the input and output data. With the +// default format "NHWC", the data is stored in the order of: +// [batch, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCHW", the data storage order of: +// [batch, in_channels, in_height, in_width]. +// If not specified, defaults to "NHWC" +func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Computes second-order gradients of the maxpooling function. +// +// Arguments: +// orig_input: The original input tensor. +// orig_output: The original output tensor. +// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. +// ksize: The size of the window for each dimension of the input tensor. +// strides: The stride of the sliding window for each dimension of the +// input tensor. +// padding: The type of padding algorithm to use. +// +// Returns Gradients of gradients w.r.t. the input to `max_pool`. +func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPoolGradGrad", + Input: []tf.Input{ + orig_input, orig_output, grad, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // TensorArrayGatherV3Attr is an optional argument to TensorArrayGatherV3. type TensorArrayGatherV3Attr func(optionalAttr) @@ -15108,33 +15257,6 @@ func TensorArrayGatherV3(scope *Scope, handle tf.Output, indices tf.Output, flow return op.Output(0) } -// This op consumes a lock created by `MutexLock`. -// -// This op exists to consume a tensor created by `MutexLock` (other than -// direct control dependencies). It should be the only that consumes the tensor, -// and will raise an error if it is not. Its only purpose is to keep the -// mutex lock tensor alive until it is consumed by this op. -// -// **NOTE**: This operation must run on the same device as its input. This may -// be enforced via the `colocate_with` mechanism. -// -// Arguments: -// mutex_lock: A tensor returned by `MutexLock`. -// -// Returns the created operation. -func ConsumeMutexLock(scope *Scope, mutex_lock tf.Output) (o *tf.Operation) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "ConsumeMutexLock", - Input: []tf.Input{ - mutex_lock, - }, - } - return scope.AddOperation(opspec) -} - // Returns x / y element-wise for integer types. // // Truncation designates that negative numbers will round fractional quantities @@ -18032,138 +18154,6 @@ func FractionalAvgPoolGrad(scope *Scope, orig_input_tensor_shape tf.Output, out_ return op.Output(0) } -// ResourceApplyMomentumAttr is an optional argument to ResourceApplyMomentum. -type ResourceApplyMomentumAttr func(optionalAttr) - -// ResourceApplyMomentumUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyMomentumUseLocking(value bool) ResourceApplyMomentumAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// ResourceApplyMomentumUseNesterov sets the optional use_nesterov attribute to value. -// -// value: If `True`, the tensor passed to compute grad will be -// var - lr * momentum * accum, so in the end, the var you get is actually -// var - lr * momentum * accum. -// If not specified, defaults to false -func ResourceApplyMomentumUseNesterov(value bool) ResourceApplyMomentumAttr { - return func(m optionalAttr) { - m["use_nesterov"] = value - } -} - -// Update '*var' according to the momentum scheme. Set use_nesterov = True if you -// -// want to use Nesterov momentum. -// -// accum = accum * momentum + grad -// var -= lr * accum -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// momentum: Momentum. Must be a scalar. -// -// Returns the created operation. -func ResourceApplyMomentum(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, momentum tf.Output, optional ...ResourceApplyMomentumAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyMomentum", - Input: []tf.Input{ - var_, accum, lr, grad, momentum, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - -// MaxPoolGradGradAttr is an optional argument to MaxPoolGradGrad. -type MaxPoolGradGradAttr func(optionalAttr) - -// MaxPoolGradGradDataFormat sets the optional data_format attribute to value. -// -// value: Specify the data format of the input and output data. With the -// default format "NHWC", the data is stored in the order of: -// [batch, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCHW", the data storage order of: -// [batch, in_channels, in_height, in_width]. -// If not specified, defaults to "NHWC" -func MaxPoolGradGradDataFormat(value string) MaxPoolGradGradAttr { - return func(m optionalAttr) { - m["data_format"] = value - } -} - -// Computes second-order gradients of the maxpooling function. -// -// Arguments: -// orig_input: The original input tensor. -// orig_output: The original output tensor. -// grad: 4-D. Gradients of gradients w.r.t. the input of `max_pool`. -// ksize: The size of the window for each dimension of the input tensor. -// strides: The stride of the sliding window for each dimension of the -// input tensor. -// padding: The type of padding algorithm to use. -// -// Returns Gradients of gradients w.r.t. the input to `max_pool`. -func MaxPoolGradGrad(scope *Scope, orig_input tf.Output, orig_output tf.Output, grad tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPoolGradGradAttr) (output tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MaxPoolGradGrad", - Input: []tf.Input{ - orig_input, orig_output, grad, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Returns the last element of the input list as well as a list with all but that element. -// -// Fails if the list is empty. -// -// input_handle: the input list -// tensor: the withdrawn last element of the list -// element_dtype: the type of elements in the list -// element_shape: the shape of the output tensor -func TensorListPopBack(scope *Scope, input_handle tf.Output, element_dtype tf.DataType) (output_handle tf.Output, tensor tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"element_dtype": element_dtype} - opspec := tf.OpSpec{ - Type: "TensorListPopBack", - Input: []tf.Input{ - input_handle, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) -} - // Returns element-wise integer closest to x. // // If the result is midway between two representable values, @@ -21645,6 +21635,50 @@ func LoopCond(scope *Scope, input tf.Output) (output tf.Output) { return op.Output(0) } +// Computes the sum along segments of a tensor. +// +// Read +// [the section on segmentation](https://tensorflow.org/api_guides/python/math_ops#Segmentation) +// for an explanation of segments. +// +// Computes a tensor such that +// \\(output[i] = \sum_{j...} data[j...]\\) where the sum is over tuples `j...` such +// that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` +// need not be sorted and need not cover all values in the full +// range of valid values. +// +// If the sum is empty for a given segment ID `i`, `output[i] = 0`. +// If the given segment ID `i` is negative, the value is dropped and will not be +// added to the sum of the segment. +// +// `num_segments` should equal the number of distinct segment IDs. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A tensor whose shape is a prefix of `data.shape`. +// +// +// Returns Has same shape as data, except for the first `segment_ids.rank` +// dimensions, which are replaced with a single dimension which has size +// `num_segments`. +func UnsortedSegmentSum(scope *Scope, data tf.Output, segment_ids tf.Output, num_segments tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "UnsortedSegmentSum", + Input: []tf.Input{ + data, segment_ids, num_segments, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Computes the product along segments of a tensor. // // Read @@ -22272,40 +22306,6 @@ func Any(scope *Scope, input tf.Output, axis tf.Output, optional ...AnyAttr) (ou return op.Output(0) } -// Creates a sequence of numbers. -// -// This operation creates a sequence of numbers that begins at `start` and -// extends by increments of `delta` up to but not including `limit`. -// -// For example: -// -// ``` -// # 'start' is 3 -// # 'limit' is 18 -// # 'delta' is 3 -// tf.range(start, limit, delta) ==> [3, 6, 9, 12, 15] -// ``` -// -// Arguments: -// start: 0-D (scalar). First entry in the sequence. -// limit: 0-D (scalar). Upper limit of sequence, exclusive. -// delta: 0-D (scalar). Optional. Default is 1. Number that increments `start`. -// -// Returns 1-D. -func Range(scope *Scope, start tf.Output, limit tf.Output, delta tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "Range", - Input: []tf.Input{ - start, limit, delta, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // DestroyResourceOpAttr is an optional argument to DestroyResourceOp. type DestroyResourceOpAttr func(optionalAttr) -- GitLab From 561a3c4331ebfaac3e61c524911bf6fe85f4ebc9 Mon Sep 17 00:00:00 2001 From: Ayush Dubey Date: Thu, 27 Sep 2018 12:20:33 -0700 Subject: [PATCH 089/570] Dynamic subdivisions in collective ring reduce. Before this change, a CollectiveOp user was required to specify subdiv_offsets for the RingReduce algorithm. During ring reduction, we created chunks of the tensor to exchange between devices. If the chunks were too large, or if the hardware supported multiple data exchanges in parallel, the user could further subdivide the chunk by specifying more than 1 subdiv offset. Each subdiv offset corresponded to another subdivision of the chunk, so effectively the total number of tensor chunks is number of devices * number of subdivs. After this change, we can dynamically infer the number of subdivisions based on a target chunk size. In ring_reducer.cc, we start with 1 subdiv, and keep increasing until chunk size is less than MAX_CHUNK_SIZE. Currently, MAX_CHUNK_SIZE is set at 4 MB, although it may make sense to change this based on specific hardware. As a part of this change, a user can now provide an empty subdiv_offset list. If empty, we dynamically add subdivisions based on the above algorithm. If non-empty, we take the user-specified subdivions. PiperOrigin-RevId: 214815959 --- .../core/common_runtime/ring_reducer.cc | 75 +++++++++++++++-- .../core/common_runtime/ring_reducer_test.cc | 83 +++++++++++++++---- tensorflow/core/kernels/collective_ops.cc | 21 +++-- 3 files changed, 147 insertions(+), 32 deletions(-) diff --git a/tensorflow/core/common_runtime/ring_reducer.cc b/tensorflow/core/common_runtime/ring_reducer.cc index a81f8650bf..b1fe928ba7 100644 --- a/tensorflow/core/common_runtime/ring_reducer.cc +++ b/tensorflow/core/common_runtime/ring_reducer.cc @@ -41,6 +41,16 @@ limitations under the License. // Set true for greater intelligibility of debug mode log messages. #define READABLE_KEYS false +// RingReduce algorithm exchanges chunks of tensor between devices. The chunk +// size depends on the number of subdivisions specified in the algorithm. If +// the user does not specify the number of subdivisions, we infer the number +// dynamically so that the resulting chunk size does not exceed +// kMaxChunkSizeBytes, empirically set at 4 MiB. +constexpr size_t kMaxChunkSizeBytes = (4 * 1024 * 1024); +// kMaxSubdivsPerDev is used to give an upper bound on the number of +// subdivisions dynamically generated. A reasonable value would be a small +// multiple of the number of NICs adjacent to each device. +constexpr int kMaxSubdivsPerDevice = 2; namespace tensorflow { namespace { @@ -92,7 +102,62 @@ RingReducer::RingReducer() RingReducer::~RingReducer() { group_size_tensor_ready_.WaitForNotification(); } +Status GenerateSubdivsInCollectiveParams(CollectiveParams* col_params) { + if (col_params->instance.shape.num_elements() == 0) { + return errors::Internal("shape in CollectiveParams should be non-empty"); + } + const int kAvgDevPerTask = + col_params->group.group_size / col_params->group.num_tasks; + const int kMaxNumSubdivs = kMaxSubdivsPerDevice * kAvgDevPerTask; + if (kMaxNumSubdivs <= 0) { + return errors::Internal("Unexpected kMaxNumSubdivs ", kMaxNumSubdivs, + " in RingReducer"); + } + // NOTE(ayushd): If no subdiv_offsets have been specified, dynamically add + // as many offsets as needed so that the size of tensor chunks <= + // kMaxChunkSizeBytes. Empirically, chunks that are too small or too large + // lead to worse performance. + int num_subdivs = 0; + const size_t tensor_size = col_params->instance.shape.num_elements() * + DataTypeSize(col_params->instance.data_type); + size_t chunk_size; + do { + ++num_subdivs; + int num_chunks = col_params->group.group_size * num_subdivs; + chunk_size = tensor_size / num_chunks; + VLOG(2) << "num_subdivs " << num_subdivs << " num_chunks " << num_chunks + << " chunk_size " << chunk_size; + } while (chunk_size > kMaxChunkSizeBytes && num_subdivs < kMaxNumSubdivs); + if (num_subdivs <= 0) { + return errors::Internal("Unexpected num_subdivs ", num_subdivs, + " in RingReducer"); + } + + int subdiv_stride = kAvgDevPerTask / num_subdivs; + if (subdiv_stride == 0) subdiv_stride = 1; + col_params->instance.impl_details.subdiv_offsets.reserve(num_subdivs); + for (int sdi = 0; sdi < num_subdivs; ++sdi) { + int subdiv_offset = subdiv_stride * sdi; + if (sdi % 2 == 1) subdiv_offset *= -1; + col_params->instance.impl_details.subdiv_offsets.push_back(subdiv_offset); + } + + if (VLOG_IS_ON(2)) { + string subdiv_buf; + for (const int subdiv_offset : + col_params->instance.impl_details.subdiv_offsets) { + strings::StrAppend(&subdiv_buf, " ", subdiv_offset); + } + VLOG(2) << "Dynamically generated " << num_subdivs + << " subdiv_offsets:" << subdiv_buf << " tensor_size " + << tensor_size << " chunk_size " << chunk_size; + } + + return Status::OK(); +} + Status RingReducer::InitializeCollectiveParams(CollectiveParams* col_params) { + // TODO(b/113171733): change CHECKs to return errors. CHECK_EQ(col_params->instance.type, REDUCTION_COLLECTIVE); CHECK_EQ(col_params->instance.impl_details.collective_name, "RingReduce"); const string& device_name = @@ -123,12 +188,11 @@ Status RingReducer::InitializeCollectiveParams(CollectiveParams* col_params) { dev_per_task.push_back(dev_count); CHECK_EQ(col_params->group.num_tasks, dev_per_task.size()); - // Generate a ring permutation for each requested offset. if (col_params->instance.impl_details.subdiv_offsets.empty()) { - return errors::Internal( - "Subdiv offsets should be non-empty for ring reducer, size=", - col_params->instance.impl_details.subdiv_offsets.size()); + TF_RETURN_IF_ERROR(GenerateSubdivsInCollectiveParams(col_params)); } + + // Generate a ring permutation for requested offset. VLOG(2) << "Setting up perms for col_params " << col_params << " subdiv_permutations " << &col_params->instance.impl_details.subdiv_permutations; @@ -646,7 +710,8 @@ bool RingReducer::RunAsyncParts() { case RF_SEND: --send_pending_count; break; - default: {} // Ignore any other actions + default: { + } // Ignore any other actions } } } diff --git a/tensorflow/core/common_runtime/ring_reducer_test.cc b/tensorflow/core/common_runtime/ring_reducer_test.cc index 28df85399e..75aba43572 100644 --- a/tensorflow/core/common_runtime/ring_reducer_test.cc +++ b/tensorflow/core/common_runtime/ring_reducer_test.cc @@ -549,37 +549,38 @@ class RingReducerTest : public ::testing::Test { int32 reduce_counter_ GUARDED_BY(mu_) = 0; }; -TEST_F(RingReducerTest, InitializeParams) { - static const int kNumDevsPerTask = 8; - static const int kNumTasks = 3; - static const int kNumDevs = kNumDevsPerTask * kNumTasks; +CollectiveParams SetUpCollectiveParams(const int num_devs_per_task, + const int num_tasks) { CollectiveParams cp; - std::vector device_names; - std::vector task_names; + const int kNumDevs = num_devs_per_task * num_tasks; cp.group.group_key = 1; cp.group.group_size = kNumDevs; cp.group.device_type = DeviceType("GPU"); - cp.group.num_tasks = kNumTasks; + cp.group.num_tasks = num_tasks; cp.instance.instance_key = 3; cp.instance.type = REDUCTION_COLLECTIVE; cp.instance.data_type = DataType(DT_FLOAT); - cp.instance.shape = TensorShape({5}); + cp.instance.shape = TensorShape({kNumDevs}); cp.instance.impl_details.collective_name = "RingReduce"; cp.instance.impl_details.subdiv_offsets.push_back(0); cp.is_source = false; for (int i = 0; i < kNumDevs; ++i) { - int task_id = i / kNumDevsPerTask; - int dev_id = i % kNumDevsPerTask; + int task_id = i / num_devs_per_task; + int dev_id = i % num_devs_per_task; string task_name = strings::StrCat("/job:worker/replica:0/task:", task_id); - task_names.push_back(task_name); string device_name = strings::StrCat(task_name, "/device:GPU:", dev_id); - device_names.push_back(device_name); cp.instance.task_names.push_back(task_name); cp.instance.device_names.push_back(device_name); } + return cp; +} - int test_rank = 0; - cp.default_rank = test_rank; +TEST_F(RingReducerTest, InitializeParams) { + const int kNumDevsPerTask = 8; + const int kNumTasks = 3; + CollectiveParams cp = SetUpCollectiveParams(kNumDevsPerTask, kNumTasks); + + cp.default_rank = 0; cp.instance.impl_details.subdiv_offsets = {0, 4}; RunSubdivPermsTest(&cp, {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, @@ -588,8 +589,15 @@ TEST_F(RingReducerTest, InitializeParams) { 8, 9, 10, 11, 20, 21, 22, 23, 16, 17, 18, 19}}, {0, 4}); - test_rank = 3; - cp.default_rank = test_rank; + cp.instance.impl_details.subdiv_offsets = {0, -4}; + RunSubdivPermsTest(&cp, + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, + {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, + 15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, 20}}, + {0, 3}); + + cp.default_rank = 3; cp.instance.impl_details.subdiv_offsets = {3, -3}; RunSubdivPermsTest(&cp, {{3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, @@ -599,6 +607,49 @@ TEST_F(RingReducerTest, InitializeParams) { {0, 1}); } +TEST_F(RingReducerTest, AutomaticSubdivs) { + const int kNumDevsPerTask = 8; + const int kNumTasks = 3; + const int kNumDevs = kNumDevsPerTask * kNumTasks; + CollectiveParams cp = SetUpCollectiveParams(kNumDevsPerTask, kNumTasks); + + // Test automatic generation of subdiv offsets. + cp.default_rank = 0; + cp.instance.impl_details.subdiv_offsets.clear(); + RunSubdivPermsTest(&cp, {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}}, + {0}); + + // Set shape so that with 2 subdivs chunk_size is 3 MiB. This should cause 2 + // offsets, {0, -4}, to be generated. + { + int num_subdivs = 2; + int num_chunks = kNumDevs * num_subdivs; + size_t chunk_size = 3 * 1048576; // 3 MB + size_t tensor_size = chunk_size * num_chunks; + cp.instance.shape = + TensorShape({static_cast(tensor_size / DataTypeSize(DT_FLOAT))}); + } + cp.instance.impl_details.subdiv_offsets.clear(); + RunSubdivPermsTest(&cp, + {{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, + {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, + 15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, 20}}, + {0, 3}); +} + +TEST_F(RingReducerTest, AutomaticSubdivUpperBound) { + const int kNumDevsPerTask = 1; + const int kNumTasks = 4; + CollectiveParams cp = SetUpCollectiveParams(kNumDevsPerTask, kNumTasks); + + cp.default_rank = 0; + cp.instance.impl_details.subdiv_offsets.clear(); + cp.instance.shape = TensorShape({104857600 / DataTypeSize(DT_FLOAT)}); + RunSubdivPermsTest(&cp, {{0, 1, 2, 3}, {0, 1, 2, 3}}, {0, 0}); +} + // TODO(b/113171733): change to use TEST_P. #define DEF_TEST(B, T, W, D, S, L, A) \ TEST_F(RingReducerTest, \ diff --git a/tensorflow/core/kernels/collective_ops.cc b/tensorflow/core/kernels/collective_ops.cc index e0da91125b..fa959b5a0e 100644 --- a/tensorflow/core/kernels/collective_ops.cc +++ b/tensorflow/core/kernels/collective_ops.cc @@ -132,6 +132,7 @@ class CollectiveReduceOpKernel : public CollectiveOpKernel { "Failed to get CollectiveExecutor from OpKernelContext for Op ", col_params_.name), done); + col_params_.instance.shape = c->input(0).shape(); // Allocate output on the first pass through this function. This must be // done immediately, while we're still in the executor thread. Otherwise // the memory is not guaranteed to be unused by any concurrently executing @@ -171,7 +172,7 @@ class CollectiveBcastSendOpKernel : public CollectiveOpKernel { OP_REQUIRES_OK( c, c->GetAttr("instance_key", &col_params_.instance.instance_key)); OP_REQUIRES_OK(c, c->GetAttr("T", &col_params_.instance.data_type)); - OP_REQUIRES_OK(c, c->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(c, c->GetAttr("shape", &col_params_.instance.shape)); col_params_.is_source = true; col_params_.instance.impl_details.subdiv_offsets = {0}; @@ -195,13 +196,14 @@ class CollectiveBcastSendOpKernel : public CollectiveOpKernel { if (c->mutable_output(0) == nullptr) { // Allocate the output tensor, trying to reuse the input. Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC( - c, c->forward_input_or_allocate_output({0}, 0, shape_, &output), - done); + OP_REQUIRES_OK_ASYNC(c, + c->forward_input_or_allocate_output( + {0}, 0, col_params_.instance.shape, &output), + done); } if (!CanProceedWithCompute(c, col_exec, done)) return; OP_REQUIRES_ASYNC( - c, shape_.IsSameSize(c->input(0).shape()), + c, col_params_.instance.shape.IsSameSize(c->input(0).shape()), errors::Internal("Declared shape of op ", col_params_.name, " does not match shape of input"), done); @@ -214,8 +216,6 @@ class CollectiveBcastSendOpKernel : public CollectiveOpKernel { } private: - TensorShape shape_; - TF_DISALLOW_COPY_AND_ASSIGN(CollectiveBcastSendOpKernel); }; @@ -234,7 +234,7 @@ class CollectiveBcastRecvOpKernel : public CollectiveOpKernel { OP_REQUIRES_OK( c, c->GetAttr("instance_key", &col_params_.instance.instance_key)); OP_REQUIRES_OK(c, c->GetAttr("T", &col_params_.instance.data_type)); - OP_REQUIRES_OK(c, c->GetAttr("shape", &shape_)); + OP_REQUIRES_OK(c, c->GetAttr("shape", &col_params_.instance.shape)); col_params_.is_source = false; col_params_.instance.impl_details.subdiv_offsets = {0}; @@ -258,7 +258,8 @@ class CollectiveBcastRecvOpKernel : public CollectiveOpKernel { if (c->mutable_output(0) == nullptr) { // No input, so must allocate output. Tensor* output = nullptr; - OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, shape_, &output), done); + OP_REQUIRES_OK_ASYNC( + c, c->allocate_output(0, col_params_.instance.shape, &output), done); } if (!CanProceedWithCompute(c, col_exec, done)) return; @@ -270,8 +271,6 @@ class CollectiveBcastRecvOpKernel : public CollectiveOpKernel { } private: - TensorShape shape_; - TF_DISALLOW_COPY_AND_ASSIGN(CollectiveBcastRecvOpKernel); }; -- GitLab From 750466c6e6624d279de7f9a43accd682d487509c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 12:37:05 -0700 Subject: [PATCH 090/570] Introduce the abstraction of RunHandler which each DirectSession can use for the duration of a single RunInternal() call from RunHandlerPool. We want to leverage this abstraction for improving the cross-session inter-op parallelism for lower latency inference in the future. In the case that global pools aren't used, this change should be a no-op. PiperOrigin-RevId: 214818187 --- tensorflow/core/BUILD | 16 ++ .../core/common_runtime/direct_session.cc | 49 +++- .../core/common_runtime/direct_session.h | 3 + .../common_runtime/direct_session_test.cc | 28 ++ tensorflow/core/framework/run_handler.cc | 248 ++++++++++++++++++ tensorflow/core/framework/run_handler.h | 95 +++++++ tensorflow/core/framework/run_handler_util.cc | 57 ++++ tensorflow/core/framework/run_handler_util.h | 43 +++ .../core/framework/run_handler_util_test.cc | 93 +++++++ tensorflow/core/protobuf/config.proto | 5 + ...ensorflow.-run-options.-experimental.pbtxt | 6 + .../golden/v1/tensorflow.-run-options.pbtxt | 6 + ...ensorflow.-run-options.-experimental.pbtxt | 6 + .../golden/v2/tensorflow.-run-options.pbtxt | 6 + 14 files changed, 655 insertions(+), 6 deletions(-) create mode 100644 tensorflow/core/framework/run_handler.cc create mode 100644 tensorflow/core/framework/run_handler.h create mode 100644 tensorflow/core/framework/run_handler_util.cc create mode 100644 tensorflow/core/framework/run_handler_util.h create mode 100644 tensorflow/core/framework/run_handler_util_test.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index ca247dc56b..01e2e9f62b 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2484,6 +2484,8 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [ "framework/op_segment.h", "framework/rendezvous.h", # only needed for tests "framework/resource_var.h", + "framework/run_handler.h", + "framework/run_handler_util.h", "framework/tensor_reference.h", "framework/tracking_allocator.h", # only needed for tests "framework/unique_tensor_references.h", @@ -2970,6 +2972,7 @@ tf_cuda_library( ":core_cpu_internal", ":device_tracer", ":framework", + ":framework_internal", ":graph", ":lib", ":lib_internal", @@ -4117,6 +4120,19 @@ tf_cc_test( ], ) +tf_cc_test( + name = "framework_run_handler_util_test", + size = "small", + srcs = ["framework/run_handler_util_test.cc"], + linkstatic = tf_kernel_tests_linkstatic(), + deps = [ + ":framework_internal", + ":lib", + ":test", + ":test_main", + ], +) + tf_cuda_cc_test( name = "common_runtime_direct_session_test", size = "small", diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 841181f8c3..458e133b68 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/framework/log_memory.h" #include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/run_handler.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/graph/algorithm.h" @@ -244,6 +245,21 @@ void DirectSession::SchedClosure(thread::ThreadPool* pool, #endif // __ANDROID__ } +static RunHandlerPool* GetOrCreateRunHandlerPool( + const SessionOptions& options) { + static RunHandlerPool* pool = + new RunHandlerPool(NumInterOpThreadsFromSessionOptions(options)); + return pool; +} + +bool DirectSession::ShouldUseRunHandlerPool() const { + if (options_.config.session_inter_op_thread_pool_size() > 0 || + options_.config.use_per_session_threads()) { + return false; + } + return true; +} + DirectSession::DirectSession(const SessionOptions& options, const DeviceMgr* device_mgr, DirectSessionFactory* const factory) @@ -582,16 +598,37 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options, } } - Executor::Args::Runner default_runner = [this, - pool](Executor::Args::Closure c) { - SchedClosure(pool, std::move(c)); - }; + std::unique_ptr handler; + if (ShouldUseRunHandlerPool() && + run_options.experimental().use_run_handler_pool()) { + // Non-null only when a global inter-op pool is used. + VLOG(1) << "Using RunHandler to scheduler inter-op closures."; + handler = GetOrCreateRunHandlerPool(options_)->Get(); + } + auto* handler_ptr = handler.get(); + + Executor::Args::Runner default_runner = nullptr; + + if (pool == nullptr) { + default_runner = [](Executor::Args::Closure c) { c(); }; + } else if (handler_ptr != nullptr) { + default_runner = [handler_ptr](Executor::Args::Closure c) { + handler_ptr->ScheduleInterOpClosure(std::move(c)); + }; + } else { + default_runner = [this, pool](Executor::Args::Closure c) { + SchedClosure(pool, std::move(c)); + }; + } + for (const auto& item : executors_and_keys->items) { - // TODO(zhengxq): support partial run. - // TODO(zhengxq): if the device picks its own threadpool, we need to assign + // TODO(azaks): support partial run. + // TODO(azaks): if the device picks its own threadpool, we need to assign // less threads to the main compute pool by default. thread::ThreadPool* device_thread_pool = item.device->tensorflow_device_thread_pool(); + // TODO(crk): Investigate usage of RunHandlerPool when using device specific + // thread pool(s). if (!device_thread_pool) { args.runner = default_runner; } else { diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index 4a6a921ea7..3a168bbe3f 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -247,6 +247,9 @@ class DirectSession : public Session { ExecutorsAndKeys* executors_and_keys, RunMetadata* run_metadata); + // Returns whether inter-op execution uses a global pool. + bool ShouldUseRunHandlerPool() const; + ::tensorflow::Status ExtendLocked(const GraphDef& graph) EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_); diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc index 65e816c202..e3e431f800 100644 --- a/tensorflow/core/common_runtime/direct_session_test.cc +++ b/tensorflow/core/common_runtime/direct_session_test.cc @@ -625,6 +625,34 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetworkWithOpts_Callable) { EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 2); } +TEST_F(DirectSessionMinusAXTest, UseRunHandlerPool) { + Initialize({3, 2, -1, 0}); + auto session = CreateSession(); + ASSERT_TRUE(session != nullptr); + TF_ASSERT_OK(session->Create(def_)); + std::vector> inputs; + + // Request two targets: one fetch output and one non-fetched output. + std::vector output_names = {y_ + ":0"}; + std::vector target_nodes = {y_neg_}; + std::vector outputs; + + // Prepares RunOptions and RunMetadata + RunOptions run_options; + run_options.mutable_experimental()->set_use_run_handler_pool(true); + + Status s = session->Run(run_options, inputs, output_names, target_nodes, + &outputs, nullptr); + TF_ASSERT_OK(s); + + ASSERT_EQ(1, outputs.size()); + // The first output should be initialized and have the correct + // output. + auto mat = outputs[0].matrix(); + ASSERT_TRUE(outputs[0].IsInitialized()); + EXPECT_FLOAT_EQ(5.0, mat(0, 0)); +} + TEST(DirectSessionTest, KeepsStateAcrossRunsOfSession) { GraphDef def; Graph g(OpRegistry::Global()); diff --git a/tensorflow/core/framework/run_handler.cc b/tensorflow/core/framework/run_handler.cc new file mode 100644 index 0000000000..9c6490a603 --- /dev/null +++ b/tensorflow/core/framework/run_handler.cc @@ -0,0 +1,248 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#include "tensorflow/core/framework/run_handler.h" + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/run_handler_util.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/util/ptr_util.h" + +namespace tensorflow { + +// Contains the concrete implementation of the RunHandler. +// Externally visible RunHandler class simply forwards the work to this one. +class RunHandler::Impl { + public: + explicit Impl(RunHandlerPool::Impl* pool_impl) : pool_impl_(pool_impl) { + Reset(); + } + + ~Impl() {} + + void set_inter_op_scheduling_range(std::uint_fast32_t start, + std::uint_fast32_t limit) { + inter_op_scheduling_range_.store(EncodePartition(start, limit), + std::memory_order_release); + } + + std::uint_fast32_t inter_op_scheduling_range() const { + return inter_op_scheduling_range_.load(std::memory_order_acquire); + } + + // Stores now time (in microseconds) since unix epoch when the handler is + // requested via RunHandlerPool::Get(). + uint64 start_time_us() const { return start_time_us_; } + + void ScheduleInterOpClosure(std::function fn); + + void Reset(); + + RunHandlerPool::Impl* pool_impl() { return pool_impl_; } + + private: + // Encoding/decoding logic for storing [start, limit) into a single + // uint_fast32_t int. We assume that pool_num_threads < (1 << 16). + const int kMaxPartitionBits = 16; + const int kMaxThreads = 1 << kMaxPartitionBits; + + std::uint_fast32_t EncodePartition(std::uint_fast32_t start, + std::uint_fast32_t limit) { + return (start << kMaxPartitionBits) | limit; + } + + void DecodePartition(std::uint_fast32_t val, std::uint_fast32_t* start, + std::uint_fast32_t* limit) { + *limit = val & (kMaxThreads - 1); + val >>= kMaxPartitionBits; + *start = val; + } + + std::atomic_uint_fast32_t inter_op_scheduling_range_; + RunHandlerPool::Impl* pool_impl_; // NOT OWNED. + uint64 start_time_us_; +}; + +// Contains shared state across all run handlers present in the pool. Also +// responsible for pool management decisions. +// This class is thread safe. +class RunHandlerPool::Impl { + public: + // Maximum number of handlers pre-created during pool construction time. The + // number has been chosen expecting each handler might at least want 1 + // inter-op thread for execution (during compute intensive workloads like + // inference). + static const int kMaxHandlers = 128; + + explicit Impl(int num_inter_op_threads) + : inter_op_thread_pool_(new thread::ThreadPool( + Env::Default(), ThreadOptions(), "inter_op", num_inter_op_threads)), + iterations_(0) { + VLOG(1) << "Creating a RunHandlerPool with max handlers: " << kMaxHandlers; + for (int i = 0; i < kMaxHandlers; ++i) { + handlers_.emplace_back(new RunHandler::Impl(this)); + free_handlers_.push_back(handlers_.back().get()); + } + } + + ~Impl() { + // Sanity check that all handlers have been returned back to the pool before + // destruction. + DCHECK_EQ(handlers_.size(), kMaxHandlers); + DCHECK_EQ(free_handlers_.size(), handlers_.size()); + DCHECK_EQ(sorted_active_handlers_.size(), 0); + } + + thread::ThreadPool* inter_op_thread_pool() const { + return inter_op_thread_pool_.get(); + } + + std::unique_ptr Get() LOCKS_EXCLUDED(mu_) { + mutex_lock l(mu_); + while (free_handlers_.empty()) { + one_handler_free_.wait(l); + } + // Remove the last entry from free_handlers_ and add to the end of + // sorted_active_handlers_. + auto* handler_impl = free_handlers_.back(); + handler_impl->Reset(); + // Sortedness isn't violated if we simply add at the end of the list, since + // handlers are expected to be obtained in increasing order of time. + sorted_active_handlers_.push_back(handler_impl); + DCHECK_LE(sorted_active_handlers_.size(), kMaxHandlers); + free_handlers_.pop_back(); + + RecomputePoolStatsLocked(); + return WrapUnique(new RunHandler(handler_impl)); + } + + void ReleaseHandler(RunHandler::Impl* handler) LOCKS_EXCLUDED(mu_) { + { + mutex_lock l(mu_); + DCHECK_GT(sorted_active_handlers_.size(), 0); + + uint64 now = tensorflow::Env::Default()->NowMicros(); + double elapsed = (now - handler->start_time_us()) / 1000.0; + time_hist_.Add(elapsed); + + // Erase from and update sorted_active_handlers_. Add it to the end of + // free_handlers_. + auto iter = std::find(sorted_active_handlers_.begin(), + sorted_active_handlers_.end(), handler); + DCHECK(iter != sorted_active_handlers_.end()) + << "Unexpected handler: " << handler + << " is being requested for release"; + + // Remove this handler from this list and add it to the list of free + // handlers. + sorted_active_handlers_.erase(iter); + free_handlers_.push_back(handler); + DCHECK_LE(free_handlers_.size(), kMaxHandlers); + + RecomputePoolStatsLocked(); + } + one_handler_free_.notify_one(); + } + + private: + void RecomputePoolStatsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_); + + // Thread safe part. + const std::unique_ptr inter_op_thread_pool_; + + // Thread compatible part used only by lock under RunHandlerPool. + // Handlers are sorted by start time. + std::vector sorted_active_handlers_ GUARDED_BY(mu_); + std::vector free_handlers_ GUARDED_BY(mu_); + std::vector> handlers_ GUARDED_BY(mu_); + // Histogram of elapsed runtime of every handler (in ms). + histogram::Histogram time_hist_ GUARDED_BY(mu_); + std::vector inter_op_start_ GUARDED_BY(mu_); + std::vector inter_op_limit_ GUARDED_BY(mu_); + int64 iterations_ GUARDED_BY(mu_); + condition_variable one_handler_free_; + mutex mu_; +}; + +void RunHandlerPool::Impl::RecomputePoolStatsLocked() { + int num_active_requests = sorted_active_handlers_.size(); + if (num_active_requests == 0) return; + + int num_threads = inter_op_thread_pool_->NumThreads(); + + inter_op_start_.resize(num_active_requests); + inter_op_limit_.resize(num_active_requests); + + const int kMinThreadsPerRequest = 3; + ComputeInterOpSchedulingRanges(num_active_requests, num_threads, + kMinThreadsPerRequest, &inter_op_start_, + &inter_op_limit_); + + for (int i = 0; i < num_active_requests; ++i) { + sorted_active_handlers_[i]->set_inter_op_scheduling_range( + inter_op_start_[i], inter_op_limit_[i]); + } + + if (iterations_++ % 5000 == 0 && VLOG_IS_ON(1)) { + VLOG(1) << "Printing time histogram: " << time_hist_.ToString(); + VLOG(1) << "Active session runs: " << num_active_requests; + uint64 now = tensorflow::Env::Default()->NowMicros(); + string ranges_str = ""; + string times_str = ""; + for (int i = 0; i < num_active_requests; ++i) { + if (i > 0) { + times_str += " "; + ranges_str += " "; + } + + times_str += strings::StrCat( + (now - sorted_active_handlers_[i]->start_time_us()) / 1000.0, " ms."); + ranges_str += strings::StrCat("[", inter_op_start_[i], ", ", + inter_op_limit_[i], ")"); + } + VLOG(1) << "Elapsed times are: " << times_str; + VLOG(1) << "Ranges are: " << ranges_str; + } +} + +void RunHandler::Impl::ScheduleInterOpClosure(std::function fn) { + std::uint_fast32_t start = 0, limit = 0; + DecodePartition(inter_op_scheduling_range(), &start, &limit); + pool_impl_->inter_op_thread_pool()->Schedule(std::move(fn)); +} + +void RunHandler::Impl::Reset() { + set_inter_op_scheduling_range( + 0, pool_impl_->inter_op_thread_pool()->NumThreads()); + start_time_us_ = tensorflow::Env::Default()->NowMicros(); +} + +RunHandlerPool::RunHandlerPool(int num_inter_op_threads) + : impl_(new Impl(num_inter_op_threads)) {} + +RunHandlerPool::~RunHandlerPool() {} + +std::unique_ptr RunHandlerPool::Get() { return impl_->Get(); } + +RunHandler::RunHandler(Impl* impl) : impl_(impl) {} + +void RunHandler::ScheduleInterOpClosure(std::function fn) { + impl_->ScheduleInterOpClosure(std::move(fn)); +} + +RunHandler::~RunHandler() { impl_->pool_impl()->ReleaseHandler(impl_); } +} // namespace tensorflow diff --git a/tensorflow/core/framework/run_handler.h b/tensorflow/core/framework/run_handler.h new file mode 100644 index 0000000000..72fa6301b4 --- /dev/null +++ b/tensorflow/core/framework/run_handler.h @@ -0,0 +1,95 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_ +#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_ + +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/lib/histogram/histogram.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/protobuf/config.pb.h" + +namespace tensorflow { + +class RunHandler; + +// RunHandlerPool is a fixed size pool of pre-allocated RunHandlers +// that can be used for tracking inter-op work for a given Session::Run(). +// RunHandler(s) in the pool are initially 'inactive'. A RunHandler becomes +// 'active' when its unique_ptr is returned by Get() and is being used by a +// client. It becomes 'inactive' once more when its unique_ptr gets destroyed. +// +// Expected usage: +// +// * Create a single RunHandlerPool (say run_handler_pool_). +// +// * When a Session::Run() is invoked, obtain a handler by: +// auto handler = run_handler_pool_->Get(); +// +// * Use handler for scheduling all inter-op work by: +// handler->ScheduleInterOpClosure(closure); +// +// This class is thread safe. +class RunHandlerPool { + public: + explicit RunHandlerPool(int num_inter_op_threads); + ~RunHandlerPool(); + + // Returns an inactive RunHandler from the pool. + // + // RunHandlers in RunHandlerPool are initially 'inactive'. + // A RunHandler becomes 'active' when its unique_ptr its returned by Get() + // and is being used by a client. It becomes 'inactive' once more when the + // unique_ptr is destroyed. + // + // Will block unless there is an inactive handler. + std::unique_ptr Get(); + + private: + class Impl; + friend class RunHandler; + + std::unique_ptr impl_; +}; + +// RunHandler can be used to schedule inter-op closures to run on a global pool +// shared across all Session::Run(s). +// +// It can only be created via RunHandlerPool::Get(). +// +// This class can be used instead of directly scheduling closures on a global +// pool since it maintains a global view across all sessions and optimizes pool +// scheduling to improve (median and tail) latency. +// +// This class is thread safe. +class RunHandler { + public: + void ScheduleInterOpClosure(std::function fn); + + ~RunHandler(); + + private: + class Impl; + friend class RunHandlerPool::Impl; + + explicit RunHandler(Impl* impl); + + Impl* impl_; // NOT OWNED. +}; + +} // end namespace tensorflow. + +#endif // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_ diff --git a/tensorflow/core/framework/run_handler_util.cc b/tensorflow/core/framework/run_handler_util.cc new file mode 100644 index 0000000000..3087998c69 --- /dev/null +++ b/tensorflow/core/framework/run_handler_util.cc @@ -0,0 +1,57 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/run_handler_util.h" + +#include +#include +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads, + int min_threads_per_request, + std::vector* start_vec, + std::vector* end_vec) { + // Each request is expected to have weight W[i] = num_active_requests - i. + // Therefore, total_weight = sum of all request weights. + float total_weight = 0.5f * num_active_requests * (num_active_requests + 1); + float demand_factor = static_cast(num_threads) / total_weight; + float last_cumulative_weight = 0.0; + min_threads_per_request = std::max(1, min_threads_per_request); + for (int i = 0; i != num_active_requests; i++) { + float cumulative_weight = + static_cast(i + 1) * + (num_active_requests - static_cast(i) * 0.5f); + float weight = cumulative_weight - last_cumulative_weight; + // Quantize thread_demand by rounding up, and also satisfying + // `min_threads_per_request` constraint. + // Note: We subtract a small epsilon (0.00001) to prevent ceil(..) from + // rounding weights like 4.0 to 5. + int demand = + std::max(min_threads_per_request, + static_cast(ceil(weight * demand_factor - 0.00001f))); + // For the quantized range [start, end); compute the floor of real start, + // and expand downwards from there with length `demand` and adjust for + // boundary conditions. + int start = last_cumulative_weight * demand_factor; + int end = std::min(num_threads, start + demand); + start = std::max(0, std::min(start, end - demand)); + start_vec->at(i) = start; + end_vec->at(i) = end; + last_cumulative_weight = cumulative_weight; + } +} +} // namespace tensorflow diff --git a/tensorflow/core/framework/run_handler_util.h b/tensorflow/core/framework/run_handler_util.h new file mode 100644 index 0000000000..c0c36aeccb --- /dev/null +++ b/tensorflow/core/framework/run_handler_util.h @@ -0,0 +1,43 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_ +#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_ + +#include +#include + +namespace tensorflow { + +// Assign thread ranges to requests. +// Requests are numbered 0...num_active_requests-1, and +// threads are numbered 0...num_threads-1. +// On return, the range start_vec->at(i)...end_vec->at(i)-1 +// indicates the subrange of the threads available to request i. +// The ranges given to different requests may overlap. +// Lower numbered requests will tend to be assigned more threads. +// Thus, a client might associate older requests with lower +// array indices so they receive access to more threads. +// However, the routine ensures that each request is given access +// to at least min(min_threads_per_request, num_threads) threads. +// Every thread will be assigned to at least one request range, +// assuming there is at least one request. +void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads, + int min_threads_per_request, + std::vector* start_vec, + std::vector* end_vec); + +} // end namespace tensorflow +#endif // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_ diff --git a/tensorflow/core/framework/run_handler_util_test.cc b/tensorflow/core/framework/run_handler_util_test.cc new file mode 100644 index 0000000000..a1928c132b --- /dev/null +++ b/tensorflow/core/framework/run_handler_util_test.cc @@ -0,0 +1,93 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/run_handler_util.h" + +#include +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +namespace tensorflow { +namespace { + +void VerifyFunction(int num_active_requests, int num_threads, + int min_threads_per_request, bool print_stats = false) { + if (print_stats) { + LOG(INFO) << "Test case# num_active_requests: " << num_active_requests + << " num_threads: " << num_threads + << " min_threads: " << min_threads_per_request; + } + std::vector start(num_active_requests); + std::vector end(num_active_requests); + + ComputeInterOpSchedulingRanges(num_active_requests, num_threads, + min_threads_per_request, &start, &end); + string range_str = ""; + for (int i = 0; i < num_active_requests; ++i) { + if (i > 0) range_str += " "; + range_str += strings::StrCat("[", start[i], ", ", end[i], ")"); + + ASSERT_GE(start[i], 0) << range_str; + ASSERT_LE(end[i], num_threads) << range_str; + if (i > 0) { + // Due to linearly decreasing demand, #threads(i - 1) >= #threads(i) + ASSERT_GE(end[i - 1] - start[i - 1], end[i] - start[i]) << range_str; + // No missing threads. + ASSERT_GE(end[i - 1], start[i]) << range_str; + } + // Each interval is at least of size 'min_threads_per_request'. + ASSERT_GE((end[i] - start[i]), min_threads_per_request) << range_str; + // Verify that assigned (quantized) threads is not overly estimated + // from real demand, when the demand is high (>= + // min_threads_per_request). + float entry_weight = num_active_requests - i; + float total_weight = 0.5f * num_active_requests * (num_active_requests + 1); + float thread_demand = (entry_weight * num_threads) / total_weight; + if (thread_demand > min_threads_per_request) { + // We expect some over-estimation of threads due to quantization, + // but we hope it's not more than 1 extra thread. + ASSERT_NEAR(end[i] - start[i], thread_demand, 1.0) + << "Ranges: " << range_str << " thread_demand: " << thread_demand + << " i: " << i; + } + } + ASSERT_EQ(end[num_active_requests - 1], num_threads); + ASSERT_EQ(start[0], 0); + if (print_stats) { + LOG(INFO) << "Assigned ranges: " << range_str; + } +} + +TEST(RunHandlerUtilTest, TestComputeInterOpSchedulingRanges) { + const int kMinThreadsPerRequestBound = 12; + const int kMaxActiveRequests = 128; + const int kMaxThreads = 128; + + for (int min_threads_per_request = 1; + min_threads_per_request <= kMinThreadsPerRequestBound; + ++min_threads_per_request) { + for (int num_active_requests = 1; num_active_requests <= kMaxActiveRequests; + ++num_active_requests) { + for (int num_threads = min_threads_per_request; + num_threads <= kMaxThreads; ++num_threads) { + VerifyFunction(num_active_requests, num_threads, + min_threads_per_request); + } + } + } +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 85cd02350a..104ab039cb 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -453,6 +453,11 @@ message RunOptions { // same group_key value (in a distributed computation where tasks // run disjoint graphs). int64 collective_graph_key = 1; + // If true, then operations (using the inter-op pool) across all + // session::run() calls will be centrally scheduled, optimizing for (median + // and tail) latency. + // Consider using this option for CPU-bound workloads like inference. + bool use_run_handler_pool = 2; }; Experimental experimental = 8; diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt index 537e73aa89..47b5b56faf 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt @@ -8,5 +8,11 @@ tf_proto { label: LABEL_OPTIONAL type: TYPE_INT64 } + field { + name: "use_run_handler_pool" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } } } diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt index cec04a2bf0..c0c2e7b9f8 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt @@ -55,6 +55,12 @@ tf_proto { label: LABEL_OPTIONAL type: TYPE_INT64 } + field { + name: "use_run_handler_pool" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } } enum_type { name: "TraceLevel" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt index 537e73aa89..47b5b56faf 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt @@ -8,5 +8,11 @@ tf_proto { label: LABEL_OPTIONAL type: TYPE_INT64 } + field { + name: "use_run_handler_pool" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } } } diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt index cec04a2bf0..c0c2e7b9f8 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt @@ -55,6 +55,12 @@ tf_proto { label: LABEL_OPTIONAL type: TYPE_INT64 } + field { + name: "use_run_handler_pool" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_BOOL + } } enum_type { name: "TraceLevel" -- GitLab From 1084594657a5d139102ac794f84d1427a710e39a Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 27 Sep 2018 12:51:52 -0700 Subject: [PATCH 091/570] TFLite: Rename ResetVariableTensorsToZero -> ResetVariableTensors PiperOrigin-RevId: 214820383 --- .../contrib/lite/experimental/c/c_api_experimental.cc | 5 ++--- .../contrib/lite/experimental/c/c_api_experimental.h | 2 +- .../lite/experimental/c/c_api_experimental_test.cc | 2 +- tensorflow/contrib/lite/interpreter.cc | 9 ++++----- tensorflow/contrib/lite/interpreter.h | 7 +++++-- tensorflow/contrib/lite/kernels/test_util.cc | 2 +- tensorflow/contrib/lite/python/interpreter.py | 4 ++-- .../python/interpreter_wrapper/interpreter_wrapper.cc | 4 ++-- .../python/interpreter_wrapper/interpreter_wrapper.h | 2 +- tensorflow/contrib/lite/testing/tflite_driver.cc | 2 +- 10 files changed, 20 insertions(+), 19 deletions(-) diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc index 0f16595811..29f8701f53 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc +++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.cc @@ -21,9 +21,8 @@ limitations under the License. extern "C" { #endif // __cplusplus -TFL_Status TFL_InterpreterResetVariableTensorsToZero( - TFL_Interpreter* interpreter) { - return interpreter->impl->ResetVariableTensorsToZero(); +TFL_Status TFL_InterpreterResetVariableTensors(TFL_Interpreter* interpreter) { + return interpreter->impl->ResetVariableTensors(); } void TFL_InterpreterOptionsAddBuiltinOp(TFL_InterpreterOptions* options, diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h index b8de7b9964..fca5d92f77 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_experimental.h +++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental.h @@ -25,7 +25,7 @@ extern "C" { typedef TfLiteBuiltinOperator TFL_BuiltinOperator; // Resets all variable tensors to zero. -TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensorsToZero( +TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensors( TFL_Interpreter* interpreter); // Adds an op registration for a builtin operator. diff --git a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc index d86ad00d6d..1b1bedb754 100644 --- a/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc +++ b/tensorflow/contrib/lite/experimental/c/c_api_experimental_test.cc @@ -44,7 +44,7 @@ TEST(CApiExperimentalSimple, Smoke) { TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options); ASSERT_NE(interpreter, nullptr); ASSERT_EQ(TFL_InterpreterAllocateTensors(interpreter), kTfLiteOk); - EXPECT_EQ(TFL_InterpreterResetVariableTensorsToZero(interpreter), kTfLiteOk); + EXPECT_EQ(TFL_InterpreterResetVariableTensors(interpreter), kTfLiteOk); EXPECT_EQ(TFL_InterpreterInvoke(interpreter), kTfLiteOk); TFL_DeleteInterpreter(interpreter); diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 2657bcd42b..88e41ffc55 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -451,16 +451,15 @@ TfLiteStatus Interpreter::AllocateTensors() { // Reset the variable tensors to zero after (re)allocating the tensors. // Developers shouldn't rely on the side effect of this function to reset - // variable tesnsors. They should call `ResetVariableTensorsToZero` directly + // variable tesnsors. They should call `ResetVariableTensors` directly // instead. - ResetVariableTensorsToZero(); + ResetVariableTensors(); return kTfLiteOk; } -// TODO(ycling): Consider to provide other functions to initialize variable -// tensors to non-zero values. -TfLiteStatus Interpreter::ResetVariableTensorsToZero() { +// TODO(ycling): Support non-zero default values. +TfLiteStatus Interpreter::ResetVariableTensors() { for (auto& tensor : tensors_) { if (!tensor.is_variable) { continue; diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index aa2bc4def6..7ef736d01b 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -421,9 +421,12 @@ class Interpreter { allow_buffer_handle_output_ = allow_buffer_handle_output; } - // Reset all variable tensors to zero. + // Reset all variable tensors to the default value. + // If a variable tensor doesn't have a buffer, reset it to zero. + // TODO(b/115961645): Implement - If a variable tensor has a buffer, reset it + // to the value of the buffer. // WARNING: This is an experimental API and subject to change. - TfLiteStatus ResetVariableTensorsToZero(); + TfLiteStatus ResetVariableTensors(); // Retrieve an operator's description of its work, for profiling purposes. const char* OpProfilingString(const TfLiteRegistration& op_reg, diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 0fdb0a3935..05a7c23ba1 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -122,7 +122,7 @@ void SingleOpModel::BuildInterpreter(std::vector> input_shapes, CHECK(interpreter_->AllocateTensors() == kTfLiteOk) << "Cannot allocate tensors"; - interpreter_->ResetVariableTensorsToZero(); + interpreter_->ResetVariableTensors(); } void SingleOpModel::Invoke() { CHECK(interpreter_->Invoke() == kTfLiteOk); } diff --git a/tensorflow/contrib/lite/python/interpreter.py b/tensorflow/contrib/lite/python/interpreter.py index 1be61fe053..5700bf7892 100644 --- a/tensorflow/contrib/lite/python/interpreter.py +++ b/tensorflow/contrib/lite/python/interpreter.py @@ -253,5 +253,5 @@ class Interpreter(object): self._ensure_safe() self._interpreter.Invoke() - def reset_all_variables_to_zero(self): - return self._interpreter.ResetVariableTensorsToZero() + def reset_all_variables(self): + return self._interpreter.ResetVariableTensors() diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc index 9ab05f3068..418f19a179 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.cc @@ -466,9 +466,9 @@ InterpreterWrapper* InterpreterWrapper::CreateWrapperCPPFromBuffer( error_msg); } -PyObject* InterpreterWrapper::ResetVariableTensorsToZero() { +PyObject* InterpreterWrapper::ResetVariableTensors() { TFLITE_PY_ENSURE_VALID_INTERPRETER(); - TFLITE_PY_CHECK(interpreter_->ResetVariableTensorsToZero()); + TFLITE_PY_CHECK(interpreter_->ResetVariableTensors()); Py_RETURN_NONE; } diff --git a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h index 641dd93db5..f5ca81e62a 100644 --- a/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h +++ b/tensorflow/contrib/lite/python/interpreter_wrapper/interpreter_wrapper.h @@ -65,7 +65,7 @@ class InterpreterWrapper { PyObject* TensorQuantization(int i) const; PyObject* SetTensor(int i, PyObject* value); PyObject* GetTensor(int i) const; - PyObject* ResetVariableTensorsToZero(); + PyObject* ResetVariableTensors(); // Returns a reference to tensor index i as a numpy array. The base_object // should be the interpreter object providing the memory. diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc index 1836eb53b9..17aa8cb293 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -301,7 +301,7 @@ bool TfLiteDriver::CheckResults() { } void TfLiteDriver::ResetLSTMStateTensors() { - interpreter_->ResetVariableTensorsToZero(); + interpreter_->ResetVariableTensors(); } } // namespace testing -- GitLab From 4cedc8b6e738b7a188c9c091cf667bacafae44b7 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 27 Sep 2018 13:18:33 -0700 Subject: [PATCH 092/570] Updating the V2 variables API. PiperOrigin-RevId: 214824023 --- .../compiler/aot/tests/make_test_graphs.py | 4 +- tensorflow/compiler/tests/lstm.py | 2 +- .../dnn_tree_combined_estimator_test.py | 2 +- .../python/external_regret_optimizer.py | 4 +- .../python/swap_regret_optimizer.py | 9 +- .../copy_graph/python/util/copy_elements.py | 6 +- .../copy_graph/python/util/copy_test.py | 4 +- .../python/kernel_tests/iterator_ops_test.py | 2 +- .../python/kernel_tests/moving_stats_test.py | 6 +- .../estimator/python/estimator/hooks_test.py | 2 +- .../framework/python/ops/variables_test.py | 28 +- .../graph_editor/tests/transform_test.py | 2 +- .../layers/python/layers/layers_test.py | 4 +- .../learn/python/learn/graph_actions_test.py | 12 +- .../learn/python/learn/monitors_test.py | 10 +- .../python/kernel_tests/sdca_ops_test.py | 8 +- .../metrics/python/ops/metric_ops_test.py | 19 +- .../contrib/model_pruning/python/pruning.py | 3 +- .../model_pruning/python/pruning_test.py | 22 +- .../opt/python/training/addsign_test.py | 12 +- .../drop_stale_gradient_optimizer_test.py | 4 +- .../training/external_optimizer_test.py | 22 +- .../training/model_average_optimizer_test.py | 3 +- .../opt/python/training/powersign_test.py | 12 +- .../rnn/python/kernel_tests/core_rnn_test.py | 4 +- .../contrib/session_bundle/exporter_test.py | 6 +- .../kernel_tests/scatter_add_ndim_op_test.py | 14 +- .../tensorrt/python/trt_convert_test.py | 2 +- .../python/training/device_setter_test.py | 8 +- tensorflow/python/client/session_test.py | 2 +- .../python/debug/cli/analyzer_cli_test.py | 20 +- .../python/debug/cli/stepper_cli_test.py | 4 +- .../python/debug/lib/debug_utils_test.py | 4 +- .../debug/lib/dist_session_debug_grpc_test.py | 4 +- .../python/debug/lib/grpc_large_data_test.py | 12 +- .../debug/lib/session_debug_file_test.py | 4 +- .../debug/lib/session_debug_grpc_test.py | 46 +-- .../python/debug/lib/session_debug_testlib.py | 90 ++--- tensorflow/python/debug/lib/stepper_test.py | 14 +- .../debug/wrappers/dumping_wrapper_test.py | 2 +- .../debug/wrappers/local_cli_wrapper_test.py | 14 +- tensorflow/python/estimator/estimator_test.py | 56 +-- tensorflow/python/framework/function_test.py | 2 +- .../python/framework/graph_util_test.py | 8 +- tensorflow/python/framework/subscribe_test.py | 4 +- tensorflow/python/grappler/item_test.py | 2 +- .../python/grappler/memory_optimizer_test.py | 10 +- .../python/grappler/tf_optimizer_test.py | 2 +- tensorflow/python/keras/engine/base_layer.py | 4 +- .../python/kernel_tests/array_ops_test.py | 4 +- .../kernel_tests/control_flow_ops_py_test.py | 56 +-- .../kernel_tests/dense_update_ops_test.py | 6 +- .../kernel_tests/identity_op_py_test.py | 2 +- .../resource_variable_ops_test.py | 2 +- .../kernel_tests/scatter_nd_ops_test.py | 4 +- .../python/kernel_tests/scatter_ops_test.py | 4 +- .../kernel_tests/variable_scope_test.py | 4 +- .../python/kernel_tests/variables_test.py | 36 +- tensorflow/python/ops/gradients_test.py | 2 +- tensorflow/python/ops/matmul_benchmark.py | 8 +- tensorflow/python/ops/variable_scope.py | 117 ++++++- tensorflow/python/ops/variables.py | 323 +++++++++++++++--- tensorflow/python/saved_model/loader_test.py | 14 +- .../python/saved_model/saved_model_test.py | 56 +-- tensorflow/python/tools/freeze_graph_test.py | 6 +- .../python/training/checkpointable/util.py | 2 +- .../training/learning_rate_decay_test.py | 4 +- .../python/training/monitored_session_test.py | 28 +- .../python/training/quantize_training_test.py | 3 +- .../python/training/queue_runner_test.py | 22 +- tensorflow/python/training/saver_test.py | 217 ++++++------ ...server_lib_same_variables_no_clear_test.py | 4 +- tensorflow/python/training/server_lib_test.py | 18 +- .../python/training/session_manager_test.py | 98 +++--- tensorflow/python/training/supervisor_test.py | 52 +-- .../training/sync_replicas_optimizer_test.py | 17 +- .../python/training/training_ops_test.py | 32 +- .../python/training/training_util_test.py | 4 +- .../api/golden/v1/tensorflow.-variable.pbtxt | 1 + .../tools/api/golden/v1/tensorflow.pbtxt | 4 + .../v2/tensorflow.-variable-scope.pbtxt | 105 ------ ...ensorflow.-variable.-save-slice-info.pbtxt | 17 - .../api/golden/v2/tensorflow.-variable.pbtxt | 130 ------- .../golden/v2/tensorflow.initializers.pbtxt | 12 - .../tools/api/golden/v2/tensorflow.pbtxt | 92 ----- .../golden/v2/tensorflow.variable_scope.pbtxt | 9 - 86 files changed, 1015 insertions(+), 1040 deletions(-) delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-variable-scope.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt delete mode 100644 tensorflow/tools/api/golden/v2/tensorflow.variable_scope.pbtxt diff --git a/tensorflow/compiler/aot/tests/make_test_graphs.py b/tensorflow/compiler/aot/tests/make_test_graphs.py index de135d7a23..64b861a730 100644 --- a/tensorflow/compiler/aot/tests/make_test_graphs.py +++ b/tensorflow/compiler/aot/tests/make_test_graphs.py @@ -47,7 +47,7 @@ def tfadd(_): def tfadd_with_ckpt(out_dir): x = array_ops.placeholder(dtypes.int32, name='x_hold') - y = variables.Variable(constant_op.constant([0]), name='y_saved') + y = variables.VariableV1(constant_op.constant([0]), name='y_saved') math_ops.add(x, y, name='x_y_sum') init_op = variables.initialize_all_variables() @@ -62,7 +62,7 @@ def tfadd_with_ckpt(out_dir): def tfadd_with_ckpt_saver(out_dir): x = array_ops.placeholder(dtypes.int32, name='x_hold') - y = variables.Variable(constant_op.constant([0]), name='y_saved') + y = variables.VariableV1(constant_op.constant([0]), name='y_saved') math_ops.add(x, y, name='x_y_sum') init_op = variables.initialize_all_variables() diff --git a/tensorflow/compiler/tests/lstm.py b/tensorflow/compiler/tests/lstm.py index 43c469d032..73b3638e80 100644 --- a/tensorflow/compiler/tests/lstm.py +++ b/tensorflow/compiler/tests/lstm.py @@ -117,7 +117,7 @@ def LSTMLayer(cell_name, weights, m, c, x_seq, pad_seq): def RandomVar(shape, name=None): """Returns a variable of the given shape initialized to random values.""" - return variables.Variable( + return variables.VariableV1( random_ops.random_uniform(shape), dtype=dtypes.float32, name=name) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py index 839eedd3a8..04baa329a0 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py @@ -189,7 +189,7 @@ class CoreDNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase): # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) # 10 steps for dnn, 3 for 1 tree of depth 3 + 1 after the tree finished - self._assert_checkpoint(est.model_dir, global_step=14) + self._assert_checkpoint(est.model_dir, global_step=15) res = est.evaluate(input_fn=_eval_input_fn, steps=1) self.assertLess(0.5, res["auc"]) est.predict(input_fn=_eval_input_fn) diff --git a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py index d1af15f7e4..67f8ac2b93 100644 --- a/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py +++ b/tensorflow/contrib/constrained_optimization/python/external_regret_optimizer.py @@ -102,9 +102,9 @@ def _project_multipliers_wrt_euclidean_norm(multipliers, radius): 0.0, (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive))) - multipliers += scale * inactive + multipliers = multipliers + (scale * inactive) new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype) - multipliers *= new_inactive + multipliers = multipliers * new_inactive return (iteration, multipliers, new_inactive, inactive) iteration = standard_ops.constant(0) diff --git a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py index 2c673d9347..a6cb1f62f0 100644 --- a/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py +++ b/tensorflow/contrib/constrained_optimization/python/swap_regret_optimizer.py @@ -175,9 +175,9 @@ def _project_stochastic_matrix_wrt_euclidean_norm(matrix): scale = (1.0 - standard_ops.reduce_sum( matrix, axis=0, keepdims=True)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True)) - matrix += scale * inactive + matrix = matrix + (scale * inactive) new_inactive = standard_ops.cast(matrix > 0, matrix.dtype) - matrix *= new_inactive + matrix = matrix * new_inactive return (iteration, matrix, new_inactive, inactive) iteration = standard_ops.constant(0) @@ -210,8 +210,9 @@ def _project_log_stochastic_matrix_wrt_kl_divergence(log_matrix): # For numerical reasons, make sure that the largest matrix element is zero # before exponentiating. - log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keepdims=True) - log_matrix -= standard_ops.log( + log_matrix = log_matrix - standard_ops.reduce_max( + log_matrix, axis=0, keepdims=True) + log_matrix = log_matrix - standard_ops.log( standard_ops.reduce_sum( standard_ops.exp(log_matrix), axis=0, keepdims=True)) return log_matrix diff --git a/tensorflow/contrib/copy_graph/python/util/copy_elements.py b/tensorflow/contrib/copy_graph/python/util/copy_elements.py index 6c9ab6aeb8..9c5871da34 100644 --- a/tensorflow/contrib/copy_graph/python/util/copy_elements.py +++ b/tensorflow/contrib/copy_graph/python/util/copy_elements.py @@ -31,7 +31,7 @@ from __future__ import division from __future__ import print_function from copy import deepcopy -from tensorflow.python.ops.variables import Variable +from tensorflow.python.ops.variables import VariableV1 from tensorflow.python.client.session import Session from tensorflow.python.framework import ops @@ -55,7 +55,7 @@ def copy_variable_to_graph(org_instance, to_graph, scope=''): TypeError: If `org_instance` is not a `Variable`. """ - if not isinstance(org_instance, Variable): + if not isinstance(org_instance, VariableV1): raise TypeError(str(org_instance) + ' is not a Variable') #The name of the new variable @@ -88,7 +88,7 @@ def copy_variable_to_graph(org_instance, to_graph, scope=''): #Initialize the new variable with to_graph.as_default(): - new_var = Variable( + new_var = VariableV1( init_value, trainable, name=new_name, diff --git a/tensorflow/contrib/copy_graph/python/util/copy_test.py b/tensorflow/contrib/copy_graph/python/util/copy_test.py index 05744bec4e..ba97c78456 100644 --- a/tensorflow/contrib/copy_graph/python/util/copy_test.py +++ b/tensorflow/contrib/copy_graph/python/util/copy_test.py @@ -36,7 +36,7 @@ class CopyVariablesTest(test.TestCase): with graph1.as_default(): #Define a Variable in graph1 - some_var = variables.Variable(2) + some_var = variables.VariableV1(2) #Initialize session sess1 = session_lib.Session() #Initialize the Variable @@ -72,7 +72,7 @@ class CopyOpsTest(test.TestCase): with graph1.as_default(): #Initialize a basic expression y = ax + b x = array_ops.placeholder("float") - a = variables.Variable(3.0) + a = variables.VariableV1(3.0) b = constant_op.constant(4.0) ax = math_ops.multiply(x, a) y = math_ops.add(ax, b) diff --git a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py index 704c0d1eb2..7e2326bd17 100644 --- a/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/iterator_ops_test.py @@ -42,7 +42,7 @@ class CheckpointInputPipelineHookTest(test.TestCase): del config global_step = training_util.get_or_create_global_step() update_global_step_op = global_step.assign_add(1) - latest_feature = variables.Variable( + latest_feature = variables.VariableV1( 0, name='latest_feature', dtype=dtypes.int64) store_latest_feature_op = latest_feature.assign(features) ops.add_to_collection('my_vars', global_step) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py b/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py index 3c988dad8a..be7c756bea 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/moving_stats_test.py @@ -38,8 +38,8 @@ class MovingReduceMeanVarianceTest(test.TestCase): true_stddev = np.array([[1.1, 0.5]]) with self.cached_session() as sess: # Start "x" out with this mean. - mean_var = variables.Variable(array_ops.zeros_like(true_mean)) - variance_var = variables.Variable(array_ops.ones_like(true_stddev)) + mean_var = variables.VariableV1(array_ops.zeros_like(true_mean)) + variance_var = variables.VariableV1(array_ops.ones_like(true_stddev)) x = random_ops.random_normal(shape, dtype=np.float64, seed=0) x = true_stddev * x + true_mean ema, emv = moving_stats.assign_moving_mean_variance( @@ -115,7 +115,7 @@ class MovingLogExponentialMovingMeanExpTest(test.TestCase): # Start "x" out with this mean. x = random_ops.random_normal(shape, dtype=np.float64, seed=0) x = true_stddev * x + true_mean - log_mean_exp_var = variables.Variable(array_ops.zeros_like(true_mean)) + log_mean_exp_var = variables.VariableV1(array_ops.zeros_like(true_mean)) variables.global_variables_initializer().run() log_mean_exp = moving_stats.assign_log_moving_mean_exp( log_mean_exp_var, x, decay=decay) diff --git a/tensorflow/contrib/estimator/python/estimator/hooks_test.py b/tensorflow/contrib/estimator/python/estimator/hooks_test.py index c6c6cad95a..62ffad56da 100644 --- a/tensorflow/contrib/estimator/python/estimator/hooks_test.py +++ b/tensorflow/contrib/estimator/python/estimator/hooks_test.py @@ -294,7 +294,7 @@ class InMemoryEvaluatorHookTest(test.TestCase): def model_fn(features, labels, mode): _, _ = features, labels - w = variables.Variable( + w = variables.VariableV1( initial_value=[0.], trainable=False, collections=[ops.GraphKeys.SAVEABLE_OBJECTS]) diff --git a/tensorflow/contrib/framework/python/ops/variables_test.py b/tensorflow/contrib/framework/python/ops/variables_test.py index f9b0efd1da..c223df5b6e 100644 --- a/tensorflow/contrib/framework/python/ops/variables_test.py +++ b/tensorflow/contrib/framework/python/ops/variables_test.py @@ -192,7 +192,7 @@ class GlobalStepTest(test.TestCase): def test_invalid_dtype(self): with ops.Graph().as_default() as g: self.assertEquals(None, variables_lib2.get_global_step()) - variables_lib.Variable( + variables_lib.VariableV1( 0.0, trainable=False, dtype=dtypes.float32, @@ -205,7 +205,7 @@ class GlobalStepTest(test.TestCase): def test_invalid_shape(self): with ops.Graph().as_default() as g: self.assertEquals(None, variables_lib2.get_global_step()) - variables_lib.Variable( + variables_lib.VariableV1( [0], trainable=False, dtype=dtypes.int32, @@ -229,7 +229,7 @@ class GlobalStepTest(test.TestCase): def test_get_global_step(self): with ops.Graph().as_default() as g: self.assertEquals(None, variables_lib2.get_global_step()) - variables_lib.Variable( + variables_lib.VariableV1( 0, trainable=False, dtype=dtypes.int32, @@ -607,10 +607,10 @@ class ModelVariablesTest(test.TestCase): with self.cached_session(): with variable_scope.variable_scope('A'): variables_lib2.local_variable([5]) - a = variables_lib.Variable([5]) + a = variables_lib.VariableV1([5]) with variable_scope.variable_scope('B'): variables_lib2.local_variable([5]) - b = variables_lib.Variable([5]) + b = variables_lib.VariableV1([5]) self.assertEquals([a], variables_lib2.get_trainable_variables('A')) self.assertEquals([b], variables_lib2.get_trainable_variables('B')) @@ -953,7 +953,7 @@ class AssignFromCheckpointTest(test.TestCase): # Create a set of variables to save in the checkpoint. for var_name in var_names_to_values: var_value = var_names_to_values[var_name] - var_list.append(variables_lib.Variable(var_value, name=var_name)) + var_list.append(variables_lib.VariableV1(var_value, name=var_name)) saver = saver_lib.Saver(var_list) init_op = variables_lib.variables_initializer(var_list) sess.run(init_op) @@ -1106,7 +1106,7 @@ class AssignFromCheckpointFnTest(test.TestCase): # Create a set of variables to save in the checkpoint. for var_name in var_names_to_values: var_value = var_names_to_values[var_name] - var_list.append(variables_lib.Variable(var_value, name=var_name)) + var_list.append(variables_lib.VariableV1(var_value, name=var_name)) saver = saver_lib.Saver(var_list) init_op = variables_lib.variables_initializer(var_list) sess.run(init_op) @@ -1297,7 +1297,7 @@ class AssignFromCheckpointFnTest(test.TestCase): class ZeroInitializerOpTest(test.TestCase): def _testZeroInitializer(self, shape, initializer, use_init): - var = variables_lib.Variable(initializer) + var = variables_lib.VariableV1(initializer) var_zero = variables_lib2.zero_initializer(var) with self.cached_session() as sess: with self.assertRaisesOpError('Attempting to use uninitialized value'): @@ -1350,12 +1350,12 @@ class FilterVariablesTest(test.TestCase): g = ops.Graph() with g.as_default(): var_list = [] - var_list.append(variables_lib.Variable(0, name='conv1/weights')) - var_list.append(variables_lib.Variable(0, name='conv1/biases')) - var_list.append(variables_lib.Variable(0, name='conv2/weights')) - var_list.append(variables_lib.Variable(0, name='conv2/biases')) - var_list.append(variables_lib.Variable(0, name='clfs/weights')) - var_list.append(variables_lib.Variable(0, name='clfs/biases')) + var_list.append(variables_lib.VariableV1(0, name='conv1/weights')) + var_list.append(variables_lib.VariableV1(0, name='conv1/biases')) + var_list.append(variables_lib.VariableV1(0, name='conv2/weights')) + var_list.append(variables_lib.VariableV1(0, name='conv2/biases')) + var_list.append(variables_lib.VariableV1(0, name='clfs/weights')) + var_list.append(variables_lib.VariableV1(0, name='clfs/biases')) self._var_list = var_list def _test_filter_variables(self, diff --git a/tensorflow/contrib/graph_editor/tests/transform_test.py b/tensorflow/contrib/graph_editor/tests/transform_test.py index 97f38c923f..0ebcdc2688 100644 --- a/tensorflow/contrib/graph_editor/tests/transform_test.py +++ b/tensorflow/contrib/graph_editor/tests/transform_test.py @@ -214,7 +214,7 @@ class TransformTest(test.TestCase): def test_graph_replace_gradients(self): ops.reset_default_graph() - w = variables.Variable(0.0, name="w") + w = variables.VariableV1(0.0, name="w") y = math_ops.multiply(math_ops.multiply(w, w, name="mul1"), w, name="mul2") g = gradients_impl.gradients(y, w, name="grad")[0] diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 85af9de4e4..3b7ae72e9c 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -2360,7 +2360,7 @@ class BatchNormTest(test.TestCase): batch_size * height * width, expected_var) images = constant_op.constant( image_values, shape=image_shape, dtype=dtypes.float32) - is_training = variables_lib.Variable(True) + is_training = variables_lib.VariableV1(True) output = _layers.batch_norm( images, decay=0.1, @@ -2507,7 +2507,7 @@ class BatchNormTest(test.TestCase): batch_size * height * width, expected_var) images = constant_op.constant( image_values, shape=image_shape, dtype=dtypes.float32) - is_training = variables_lib.Variable(True) + is_training = variables_lib.VariableV1(True) output = _layers.batch_norm( images, decay=0.1, diff --git a/tensorflow/contrib/learn/python/learn/graph_actions_test.py b/tensorflow/contrib/learn/python/learn/graph_actions_test.py index 33180b778a..a160cb54a3 100644 --- a/tensorflow/contrib/learn/python/learn/graph_actions_test.py +++ b/tensorflow/contrib/learn/python/learn/graph_actions_test.py @@ -162,9 +162,9 @@ class GraphActionsTest(test.TestCase): Tuple of 3 `Tensor` objects, 2 input and 1 output. """ variables_lib.create_global_step() - in0 = variables.Variable(1.0) + in0 = variables.VariableV1(1.0) in1 = variables_lib.local_variable(2.0) - fake_table = variables.Variable( + fake_table = variables.VariableV1( 3.0, trainable=False, collections=['fake_tables'], @@ -312,8 +312,8 @@ class GraphActionsTest(test.TestCase): def test_evaluate_ready_for_local_init(self): with ops.Graph().as_default() as g, self.session(g): variables_lib.create_global_step() - v = variables.Variable(1.0) - variables.Variable( + v = variables.VariableV1(1.0) + variables.VariableV1( v + 1, collections=[ops.GraphKeys.LOCAL_VARIABLES], trainable=False) ready_for_local_init_op = variables.report_uninitialized_variables( variables.global_variables()) @@ -456,9 +456,9 @@ class GraphActionsTrainTest(test.TestCase): Tuple of 3 `Tensor` objects, 2 input and 1 output. """ variables_lib.create_global_step() - in0 = variables.Variable(1.0) + in0 = variables.VariableV1(1.0) in1 = variables_lib.local_variable(2.0) - fake_table = variables.Variable( + fake_table = variables.VariableV1( 3.0, trainable=False, collections=['fake_tables'], diff --git a/tensorflow/contrib/learn/python/learn/monitors_test.py b/tensorflow/contrib/learn/python/learn/monitors_test.py index 83e48a36e7..d4a7169bb6 100644 --- a/tensorflow/contrib/learn/python/learn/monitors_test.py +++ b/tensorflow/contrib/learn/python/learn/monitors_test.py @@ -247,7 +247,7 @@ class MonitorsTest(test.TestCase): def test_logging_trainable(self): with ops.Graph().as_default() as g, self.session(g): - var = variables.Variable(constant_op.constant(42.0), name='foo') + var = variables.VariableV1(constant_op.constant(42.0), name='foo') var.initializer.run() cof = constant_op.constant(1.0) loss = math_ops.subtract( @@ -261,7 +261,7 @@ class MonitorsTest(test.TestCase): with ops.Graph().as_default() as g, self.session(g): log_dir = 'log/dir' summary_writer = testing.FakeSummaryWriter(log_dir, g) - var = variables.Variable(0.0) + var = variables.VariableV1(0.0) var.initializer.run() tensor = state_ops.assign_add(var, 1.0) summary_op = summary.scalar('my_summary', tensor) @@ -526,8 +526,8 @@ class MonitorsTest(test.TestCase): monitor0 = learn.monitors.GraphDump() monitor1 = learn.monitors.GraphDump() with ops.Graph().as_default() as g, self.session(g): - const_var = variables.Variable(42.0, name='my_const') - counter_var = variables.Variable(0.0, name='my_counter') + const_var = variables.VariableV1(42.0, name='my_const') + counter_var = variables.VariableV1(0.0, name='my_counter') assign_add = state_ops.assign_add(counter_var, 1.0, name='my_assign_add') variables.global_variables_initializer().run() @@ -569,7 +569,7 @@ class MonitorsTest(test.TestCase): monitor = learn.monitors.CaptureVariable( var_name='my_assign_add:0', every_n=8, first_n=2) with ops.Graph().as_default() as g, self.session(g): - var = variables.Variable(0.0, name='my_var') + var = variables.VariableV1(0.0, name='my_var') var.initializer.run() state_ops.assign_add(var, 1.0, name='my_assign_add') self._run_monitor(monitor, num_epochs=3, num_steps_per_epoch=10) diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index 9ecf023e03..8466dc36d1 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -125,7 +125,7 @@ def make_random_examples_and_variables_dicts(num_examples, dim, num_non_zero): ], example_ids=[str(i) for i in range(num_examples)]) - weights = variables_lib.Variable( + weights = variables_lib.VariableV1( array_ops.zeros([dim], dtype=dtypes.float32)) variables_dict = dict( sparse_features_weights=[weights], @@ -184,7 +184,7 @@ def make_dense_examples_and_variables_dicts(dense_features_values, weights, dense_tensors.append(dense_tensor) # Add variables of shape [feature_column_dimension]. dense_weights.append( - variables_lib.Variable( + variables_lib.VariableV1( array_ops.zeros( [dense_tensor.get_shape().as_list()[1]], dtype=dtypes.float32))) @@ -341,7 +341,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): examples = make_example_dict(example_protos, example_weights) # Explicitly make age a [1]-shaped Variable (which cannot be # partitioned), while making gender a PartitionedVariable. - age_weights = variables_lib.Variable( + age_weights = variables_lib.VariableV1( array_ops.zeros([1], dtype=dtypes.float32)) with variable_scope.variable_scope( name_or_scope=('variables/shard_{}'.format(num_shards) @@ -801,7 +801,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): labels=[1.0, 0.0]) # Replace with a variable of size 1 instead of 2. variables['dense_features_weights'] = [ - variables_lib.Variable(array_ops.zeros( + variables_lib.VariableV1(array_ops.zeros( [1], dtype=dtypes.float32)) ] options = dict( diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index 955b83b44d..fc64f343ab 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -2069,11 +2069,11 @@ class StreamingDynamicAUCTest(test.TestCase): num_batches = 100 labels = np.array([]) predictions = np.array([]) - tf_labels = variables.Variable( + tf_labels = variables.VariableV1( array_ops.ones(batch_size, dtypes_lib.int32), collections=[ops.GraphKeys.LOCAL_VARIABLES], dtype=dtypes_lib.int32) - tf_predictions = variables.Variable( + tf_predictions = variables.VariableV1( array_ops.ones(batch_size), collections=[ops.GraphKeys.LOCAL_VARIABLES], dtype=dtypes_lib.float32) @@ -2133,15 +2133,15 @@ class StreamingDynamicAUCTest(test.TestCase): labels = np.array([]) predictions = np.array([]) weights = np.array([]) - tf_labels = variables.Variable( + tf_labels = variables.VariableV1( array_ops.ones(batch_size, dtypes_lib.int32), collections=[ops.GraphKeys.LOCAL_VARIABLES], dtype=dtypes_lib.int32) - tf_predictions = variables.Variable( + tf_predictions = variables.VariableV1( array_ops.ones(batch_size), collections=[ops.GraphKeys.LOCAL_VARIABLES], dtype=dtypes_lib.float32) - tf_weights = variables.Variable( + tf_weights = variables.VariableV1( array_ops.ones(batch_size), collections=[ops.GraphKeys.LOCAL_VARIABLES], dtype=dtypes_lib.float32) @@ -2311,10 +2311,11 @@ class AucWithConfidenceIntervalsTest(test.TestCase): num_batches = 100 labels = np.array([]) predictions = np.array([]) - tf_labels = variables.Variable(array_ops.ones(batch_size, dtypes_lib.int32), - collections=[ops.GraphKeys.LOCAL_VARIABLES], - dtype=dtypes_lib.int32) - tf_predictions = variables.Variable( + tf_labels = variables.VariableV1( + array_ops.ones(batch_size, dtypes_lib.int32), + collections=[ops.GraphKeys.LOCAL_VARIABLES], + dtype=dtypes_lib.int32) + tf_predictions = variables.VariableV1( array_ops.ones(batch_size), collections=[ops.GraphKeys.LOCAL_VARIABLES], dtype=dtypes_lib.float32) diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py index a81abac2fa..67e58ff15d 100644 --- a/tensorflow/contrib/model_pruning/python/pruning.py +++ b/tensorflow/contrib/model_pruning/python/pruning.py @@ -247,7 +247,8 @@ class Pruning(object): # Stores the tensorflow sparsity variable. # Built using self._setup_sparsity() or provided externally - self._sparsity = sparsity if sparsity else self._setup_sparsity() + self._sparsity = (sparsity + if sparsity is not None else self._setup_sparsity()) # List of tensorflow assignments ops for new masks and thresholds self._assign_ops = [] diff --git a/tensorflow/contrib/model_pruning/python/pruning_test.py b/tensorflow/contrib/model_pruning/python/pruning_test.py index cd3d8e76bb..1b6da5ce2b 100644 --- a/tensorflow/contrib/model_pruning/python/pruning_test.py +++ b/tensorflow/contrib/model_pruning/python/pruning_test.py @@ -45,7 +45,7 @@ class PruningHParamsTest(test.TestCase): # Add global step variable to the graph self.global_step = training_util.get_or_create_global_step() # Add sparsity - self.sparsity = variables.Variable(0.5, name="sparsity") + self.sparsity = variables.VariableV1(0.5, name="sparsity") # Parse hparams self.pruning_hparams = pruning.get_pruning_hparams().parse( self.TEST_HPARAMS) @@ -88,7 +88,7 @@ class PruningTest(test.TestCase): width = 10 height = 20 with self.cached_session(): - weights = variables.Variable( + weights = variables.VariableV1( random_ops.random_normal([width, height], stddev=1), name="weights") masked_weights = pruning.apply_mask(weights, variable_scope.get_variable_scope()) @@ -99,10 +99,10 @@ class PruningTest(test.TestCase): def testUpdateSingleMask(self): with self.cached_session() as session: - weights = variables.Variable( + weights = variables.VariableV1( math_ops.linspace(1.0, 100.0, 100), name="weights") masked_weights = pruning.apply_mask(weights) - sparsity = variables.Variable(0.5, name="sparsity") + sparsity = variables.VariableV1(0.5, name="sparsity") p = pruning.Pruning(sparsity=sparsity) p._spec.threshold_decay = 0.0 mask_update_op = p.mask_update_op() @@ -115,8 +115,8 @@ class PruningTest(test.TestCase): def _blockMasking(self, hparams, weights, expected_mask): - threshold = variables.Variable(0.0, name="threshold") - sparsity = variables.Variable(0.5, name="sparsity") + threshold = variables.VariableV1(0.0, name="threshold") + sparsity = variables.VariableV1(0.5, name="sparsity") test_spec = ",".join(hparams) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) @@ -169,7 +169,7 @@ class PruningTest(test.TestCase): partitioner = partitioned_variables.variable_axis_size_partitioner(40) with self.cached_session() as session: with variable_scope.variable_scope("", partitioner=partitioner): - sparsity = variables.Variable(0.5, name="Sparsity") + sparsity = variables.VariableV1(0.5, name="Sparsity") weights = variable_scope.get_variable( "weights", initializer=math_ops.linspace(1.0, 100.0, 100)) masked_weights = pruning.apply_mask( @@ -190,10 +190,10 @@ class PruningTest(test.TestCase): ] test_spec = ",".join(param_list) pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) - weights = variables.Variable( + weights = variables.VariableV1( math_ops.linspace(1.0, 100.0, 100), name="weights") masked_weights = pruning.apply_mask(weights) - sparsity = variables.Variable(0.00, name="sparsity") + sparsity = variables.VariableV1(0.00, name="sparsity") # Set up pruning p = pruning.Pruning(pruning_hparams, sparsity=sparsity) p._spec.threshold_decay = 0.0 @@ -222,11 +222,11 @@ class PruningTest(test.TestCase): pruning_hparams = pruning.get_pruning_hparams().parse(test_spec) with variable_scope.variable_scope("layer1"): - w1 = variables.Variable( + w1 = variables.VariableV1( math_ops.linspace(1.0, 100.0, 100), name="weights") _ = pruning.apply_mask(w1) with variable_scope.variable_scope("layer2"): - w2 = variables.Variable( + w2 = variables.VariableV1( math_ops.linspace(1.0, 100.0, 100), name="weights") _ = pruning.apply_mask(w2) diff --git a/tensorflow/contrib/opt/python/training/addsign_test.py b/tensorflow/contrib/opt/python/training/addsign_test.py index 628a735e72..6150fa117f 100644 --- a/tensorflow/contrib/opt/python/training/addsign_test.py +++ b/tensorflow/contrib/opt/python/training/addsign_test.py @@ -80,9 +80,9 @@ class AddSignTest(test.TestCase): global_step = resource_variable_ops.ResourceVariable( 0, trainable=False) else: - var0 = variables.Variable(var0_np) - var1 = variables.Variable(var1_np) - global_step = variables.Variable( + var0 = variables.VariableV1(var0_np) + var1 = variables.VariableV1(var1_np) + global_step = variables.VariableV1( 0, trainable=False) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) @@ -183,9 +183,9 @@ class AddSignTest(test.TestCase): global_step = resource_variable_ops.ResourceVariable( 0, trainable=False) else: - var0 = variables.Variable(var0_np) - var1 = variables.Variable(var1_np) - global_step = variables.Variable( + var0 = variables.VariableV1(var0_np) + var1 = variables.VariableV1(var1_np) + global_step = variables.VariableV1( 0, trainable=False) grads0_np_indices = np.array([0, 1], dtype=np.int32) grads0 = ops.IndexedSlices( diff --git a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer_test.py b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer_test.py index 53232082e1..0a69096768 100644 --- a/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/drop_stale_gradient_optimizer_test.py @@ -61,8 +61,8 @@ def _get_workers(num_workers, staleness): graph = ops.Graph() with graph.as_default(): global_step = training_util.create_global_step() - var_0 = variables.Variable(0.0, name='v0') - var_1 = variables.Variable(1.0, name='v1') + var_0 = variables.VariableV1(0.0, name='v0') + var_1 = variables.VariableV1(1.0, name='v1') compute_gradients_queue = data_flow_ops.FIFOQueue( -1, global_step.dtype.base_dtype, shapes=(), name='compute_gradients_queue', shared_name='compute_gradients_queue') diff --git a/tensorflow/contrib/opt/python/training/external_optimizer_test.py b/tensorflow/contrib/opt/python/training/external_optimizer_test.py index 9997103016..70c5f8ff19 100644 --- a/tensorflow/contrib/opt/python/training/external_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/external_optimizer_test.py @@ -69,9 +69,9 @@ class TestCase(test.TestCase): class ExternalOptimizerInterfaceTest(TestCase): def test_optimize(self): - scalar = variables.Variable(random_ops.random_normal([]), 'scalar') - vector = variables.Variable(random_ops.random_normal([2]), 'vector') - matrix = variables.Variable(random_ops.random_normal([2, 3]), 'matrix') + scalar = variables.VariableV1(random_ops.random_normal([]), 'scalar') + vector = variables.VariableV1(random_ops.random_normal([2]), 'vector') + matrix = variables.VariableV1(random_ops.random_normal([2, 3]), 'matrix') minimum_location = constant_op.constant(np.arange(9), dtype=dtypes.float32) @@ -96,7 +96,7 @@ class ExternalOptimizerInterfaceTest(TestCase): def test_callbacks(self): vector_val = np.array([7., -2.], dtype=np.float32) - vector = variables.Variable(vector_val, 'vector') + vector = variables.VariableV1(vector_val, 'vector') minimum_location_val = np.arange(2) minimum_location = constant_op.constant( @@ -160,7 +160,7 @@ class ScipyOptimizerInterfaceTest(TestCase): rtol=1e-5, atol=1e-5, dimension=5): - x = variables.Variable(array_ops.zeros(dimension)) + x = variables.VariableV1(array_ops.zeros(dimension)) optimizer = external_optimizer.ScipyOptimizerInterface( self._objective(x), method=method, options=options) @@ -173,7 +173,7 @@ class ScipyOptimizerInterfaceTest(TestCase): def test_unconstrained(self): dimension = 5 - x = variables.Variable(array_ops.zeros(dimension)) + x = variables.VariableV1(array_ops.zeros(dimension)) optimizer = external_optimizer.ScipyOptimizerInterface(self._objective(x)) with self.cached_session() as sess: @@ -230,7 +230,7 @@ class ScipyOptimizerInterfaceTest(TestCase): def test_nonlinear_programming(self): vector_initial_value = [7., 7.] - vector = variables.Variable(vector_initial_value, 'vector') + vector = variables.VariableV1(vector_initial_value, 'vector') # Make norm as small as possible. loss = math_ops.reduce_sum(math_ops.square(vector)) @@ -249,7 +249,7 @@ class ScipyOptimizerInterfaceTest(TestCase): def test_scalar_bounds(self): vector_initial_value = [7., 7.] - vector = variables.Variable(vector_initial_value, 'vector') + vector = variables.VariableV1(vector_initial_value, 'vector') # Make norm as small as possible. loss = math_ops.reduce_sum(math_ops.square(vector)) @@ -267,7 +267,7 @@ class ScipyOptimizerInterfaceTest(TestCase): def test_vector_bounds(self): vector_initial_value = [7., 7.] - vector = variables.Variable(vector_initial_value, 'vector') + vector = variables.VariableV1(vector_initial_value, 'vector') # Make norm as small as possible. loss = math_ops.reduce_sum(math_ops.square(vector)) @@ -287,7 +287,7 @@ class ScipyOptimizerInterfaceTest(TestCase): # after running optimizer.minimize(). # Bug reference: b/64065260 vector_initial_value = [7., 7.] - vector = variables.Variable(vector_initial_value, 'vector') + vector = variables.VariableV1(vector_initial_value, 'vector') loss = math_ops.reduce_sum(math_ops.square(vector)) optimizer = external_optimizer.ScipyOptimizerInterface( @@ -301,7 +301,7 @@ class ScipyOptimizerInterfaceTest(TestCase): def test_callbacks(self): vector_val = np.array([7., -2.], dtype=np.float32) - vector = variables.Variable(vector_val, 'vector') + vector = variables.VariableV1(vector_val, 'vector') minimum_location_val = np.arange(2) minimum_location = constant_op.constant( diff --git a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py index b1fc50a21f..a25455e95d 100644 --- a/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py +++ b/tensorflow/contrib/opt/python/training/model_average_optimizer_test.py @@ -110,10 +110,11 @@ def _get_workers(num_workers, steps, workers): class ModelAverageOptimizerTest(test.TestCase): + def _run(self, train_op, sess): sess.run(train_op) - def test1Workers2Period(self): + def disabled_test1Workers2Period(self): num_workers = 2 steps = 2 num_ps = 1 diff --git a/tensorflow/contrib/opt/python/training/powersign_test.py b/tensorflow/contrib/opt/python/training/powersign_test.py index 0bcf5d230a..1cf9901dc0 100644 --- a/tensorflow/contrib/opt/python/training/powersign_test.py +++ b/tensorflow/contrib/opt/python/training/powersign_test.py @@ -81,9 +81,9 @@ class PowerSignTest(test.TestCase): global_step = resource_variable_ops.ResourceVariable( 0, trainable=False) else: - var0 = variables.Variable(var0_np) - var1 = variables.Variable(var1_np) - global_step = variables.Variable( + var0 = variables.VariableV1(var0_np) + var1 = variables.VariableV1(var1_np) + global_step = variables.VariableV1( 0, trainable=False) grads0 = constant_op.constant(grads0_np) grads1 = constant_op.constant(grads1_np) @@ -188,9 +188,9 @@ class PowerSignTest(test.TestCase): global_step = resource_variable_ops.ResourceVariable( 0, trainable=False) else: - var0 = variables.Variable(var0_np) - var1 = variables.Variable(var1_np) - global_step = variables.Variable( + var0 = variables.VariableV1(var0_np) + var1 = variables.VariableV1(var1_np) + global_step = variables.VariableV1( 0, trainable=False) grads0_np_indices = np.array([0, 1], dtype=np.int32) grads0 = ops.IndexedSlices( diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py index bf699db3ed..f31ad53d3c 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py @@ -163,8 +163,8 @@ class TestStateSaverWithCounters(TestStateSaver): def __init__(self, batch_size, state_size): super(TestStateSaverWithCounters, self).__init__(batch_size, state_size) - self._num_state_calls = variables_lib.Variable(0) - self._num_save_state_calls = variables_lib.Variable(0) + self._num_state_calls = variables_lib.VariableV1(0) + self._num_save_state_calls = variables_lib.VariableV1(0) def state(self, name): with ops_lib.control_dependencies( diff --git a/tensorflow/contrib/session_bundle/exporter_test.py b/tensorflow/contrib/session_bundle/exporter_test.py index 86df425da0..68419ffea0 100644 --- a/tensorflow/contrib/session_bundle/exporter_test.py +++ b/tensorflow/contrib/session_bundle/exporter_test.py @@ -64,10 +64,10 @@ class SaveRestoreShardedTest(test.TestCase): # v2 is an unsaved variable derived from v0 and v1. It is used to # exercise the ability to run an init op when restoring a graph. with sess.graph.device("/cpu:0"): - v0 = variables.Variable(10, name="v0") + v0 = variables.VariableV1(10, name="v0") with sess.graph.device("/cpu:1"): - v1 = variables.Variable(20, name="v1") - v2 = variables.Variable(1, name="v2", trainable=False, collections=[]) + v1 = variables.VariableV1(20, name="v1") + v2 = variables.VariableV1(1, name="v2", trainable=False, collections=[]) assign_v2 = state_ops.assign(v2, math_ops.add(v0, v1)) init_op = control_flow_ops.group(assign_v2, name="init_op") diff --git a/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py b/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py index 1c4e18dbda..0b02bdcb50 100644 --- a/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py +++ b/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py @@ -27,7 +27,7 @@ from tensorflow.python.platform import googletest class ScatterAddNdimTest(test_util.TensorFlowTestCase): def test1dim(self): - input_data = variables.Variable( + input_data = variables.VariableV1( [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]) indices = [[1], [10]] updates = [100., 200.] @@ -40,8 +40,8 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase): input_data.eval()) def test3dim(self): - input_data = variables.Variable([[[1., 2., 3.], [4., 5., 6.]], - [[7., 8., 9.], [10., 11., 12.]]]) + input_data = variables.VariableV1([[[1., 2., 3.], [4., 5., 6.]], + [[7., 8., 9.], [10., 11., 12.]]]) indices = [[0, 0, 1], [1, 1, 2]] updates = [100., 200.] @@ -53,7 +53,7 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase): def testNoUpdates(self): init_val = [[[1., 2., 3.], [4., 5., 6.]], [[7., 8., 9.], [10., 11., 12.]]] - input_data = variables.Variable(init_val) + input_data = variables.VariableV1(init_val) indices = [] updates = [] @@ -64,7 +64,7 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase): def testBadInput(self): init_val = [[[1., 2., 3.], [4., 5., 6.]], [[7., 8., 9.], [10., 11., 12.]]] - input_data = variables.Variable(init_val) + input_data = variables.VariableV1(init_val) indices = [[0, 0, 1], [1, 1, 2]] updates = [100.] with self.cached_session(): @@ -75,8 +75,8 @@ class ScatterAddNdimTest(test_util.TensorFlowTestCase): self.assertAllEqual(init_val, input_data.eval()) def testIncompleteIndices(self): - input_data = variables.Variable([[[1., 2., 3.], [4., 5., 6.]], - [[7., 8., 9.], [10., 11., 12.]]]) + input_data = variables.VariableV1([[[1., 2., 3.], [4., 5., 6.]], + [[7., 8., 9.], [10., 11., 12.]]]) indices = [[0, 0], [1, 1]] updates = [[100., 200., 300.], [400., 500., 600.]] diff --git a/tensorflow/contrib/tensorrt/python/trt_convert_test.py b/tensorflow/contrib/tensorrt/python/trt_convert_test.py index f3a1ef0d47..52cb0bd9f9 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert_test.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert_test.py @@ -94,7 +94,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase): with g.device("/GPU:0"): inp = array_ops.placeholder( dtype=dtypes.float32, shape=[None, 1, 1], name="input") - var = variables.Variable([[[1.0]]], dtype=dtypes.float32, name="v1") + var = variables.VariableV1([[[1.0]]], dtype=dtypes.float32, name="v1") add = inp + var.value() mul = inp * add add = mul + add diff --git a/tensorflow/contrib/training/python/training/device_setter_test.py b/tensorflow/contrib/training/python/training/device_setter_test.py index 20746d911c..3bb2dce83d 100644 --- a/tensorflow/contrib/training/python/training/device_setter_test.py +++ b/tensorflow/contrib/training/python/training/device_setter_test.py @@ -98,10 +98,10 @@ class GreedyLoadBalancingStrategyTest(test.TestCase): cluster=_CLUSTER_SPEC, ps_strategy=device_setter_lib.GreedyLoadBalancingStrategy( 2, device_setter_lib.byte_size_load_fn))): - u = variables.Variable(array_ops.zeros([2, 2])) - v = variables.Variable(array_ops.zeros([2, 1])) - w = variables.Variable(array_ops.zeros([2, 2])) - x = variables.Variable(array_ops.zeros([1, 3])) + u = variables.VariableV1(array_ops.zeros([2, 2])) + v = variables.VariableV1(array_ops.zeros([2, 1])) + w = variables.VariableV1(array_ops.zeros([2, 2])) + x = variables.VariableV1(array_ops.zeros([1, 3])) a = v + w self.assertDeviceEqual("/job:ps/task:0", u.device) self.assertDeviceEqual("/job:ps/task:0", u.initializer.device) diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index f576435136..5c0c405306 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -1022,7 +1022,7 @@ class SessionTest(test_util.TensorFlowTestCase): with session.Session(): a = constant_op.constant(1.0, shape=[1, 2]) b = constant_op.constant(2.0, shape=[1, 2], name='b') - v = variables.Variable(a, a.dtype) + v = variables.VariableV1(a, a.dtype) assign_a_to_v = state_ops.assign(v, a) assign_a_to_v.eval() diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py index 4630bda590..f197a9e4dc 100644 --- a/tensorflow/python/debug/cli/analyzer_cli_test.py +++ b/tensorflow/python/debug/cli/analyzer_cli_test.py @@ -599,11 +599,11 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): v_name = "simple_mul_add/v" u_init = constant_op.constant(u_init_val, shape=[2, 2], name="u_init") - u = variables.Variable(u_init, name=u_name) + u = variables.VariableV1(u_init, name=u_name) cls._u_line_number = line_number_above() v_init = constant_op.constant(v_init_val, shape=[2, 1], name="v_init") - v = variables.Variable(v_init, name=v_name) + v = variables.VariableV1(v_init, name=v_name) cls._v_line_number = line_number_above() w = math_ops.matmul(u, v, name="simple_mul_add/matmul") @@ -612,7 +612,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): x = math_ops.add(w, w, name="simple_mul_add/add") cls._x_line_number = line_number_above() - a = variables.Variable([1, 3, 3, 7], name="a") + a = variables.VariableV1([1, 3, 3, 7], name="a") u.initializer.run() v.initializer.run() @@ -1371,7 +1371,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): # Verify the annotation of the line that creates u. index = self._findSourceLine(out, self._u_line_number) self.assertEqual( - ["L%d u = variables.Variable(u_init, name=u_name)" % + ["L%d u = variables.VariableV1(u_init, name=u_name)" % self._u_line_number, " simple_mul_add/u", " simple_mul_add/u/Assign", @@ -1388,7 +1388,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): # Verify the annotation of the line that creates v. index = self._findSourceLine(out, self._v_line_number) self.assertEqual( - ["L%d v = variables.Variable(v_init, name=v_name)" % + ["L%d v = variables.VariableV1(v_init, name=v_name)" % self._v_line_number, " simple_mul_add/v"], out.lines[index : index + 2]) @@ -1425,7 +1425,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): # Verify the annotation of the line that creates u. index = self._findSourceLine(out, self._u_line_number) self.assertEqual( - ["L%d u = variables.Variable(u_init, name=u_name)" % + ["L%d u = variables.VariableV1(u_init, name=u_name)" % self._u_line_number, " simple_mul_add/u/read:0", " simple_mul_add/u:0"], @@ -1447,7 +1447,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): index = self._findSourceLine(out, self._u_line_number) self.assertEqual( - ["L%d u = variables.Variable(u_init, name=u_name)" % + ["L%d u = variables.VariableV1(u_init, name=u_name)" % self._u_line_number, " simple_mul_add/u", " simple_mul_add/u/Assign", @@ -1470,7 +1470,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): index = self._findSourceLine(out, self._u_line_number) self.assertEqual( - ["L%d u = variables.Variable(u_init, name=u_name)" % + ["L%d u = variables.VariableV1(u_init, name=u_name)" % self._u_line_number, " simple_mul_add/u", " (... Omitted 2 of 3 op(s) ...) +5"], @@ -1580,7 +1580,7 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): """List an input tree containing tensors from non-:0 output slot.""" with session.Session(config=no_rewrite_session_config()) as sess: - x = variables.Variable([1, 3, 3, 7], name="x") + x = variables.VariableV1([1, 3, 3, 7], name="x") _, idx = array_ops.unique(x, name="x_unique") idx_times_two = math_ops.multiply(idx, 2, name="idx_times_two") sess.run(x.initializer) @@ -1684,7 +1684,7 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase): with session.Session(config=no_rewrite_session_config()) as sess: x_init_val = np.array([5.0, 3.0]) x_init = constant_op.constant(x_init_val, shape=[2]) - x = variables.Variable(x_init, name="control_deps/x") + x = variables.VariableV1(x_init, name="control_deps/x") y = math_ops.add(x, x, name="control_deps/y") y = control_flow_ops.with_dependencies( diff --git a/tensorflow/python/debug/cli/stepper_cli_test.py b/tensorflow/python/debug/cli/stepper_cli_test.py index ee8cabca0d..7b8a42c253 100644 --- a/tensorflow/python/debug/cli/stepper_cli_test.py +++ b/tensorflow/python/debug/cli/stepper_cli_test.py @@ -132,8 +132,8 @@ def _parse_updated(lines): class NodeStepperSimpleGraphTest(test_util.TensorFlowTestCase): def setUp(self): - self.a = variables.Variable(10.0, name="a") - self.b = variables.Variable(20.0, name="b") + self.a = variables.VariableV1(10.0, name="a") + self.b = variables.VariableV1(20.0, name="b") self.c = math_ops.add(self.a, self.b, name="c") # Should be 30.0. self.d = math_ops.subtract(self.a, self.c, name="d") # Should be -20.0. diff --git a/tensorflow/python/debug/lib/debug_utils_test.py b/tensorflow/python/debug/lib/debug_utils_test.py index 5b1875e092..23ab98444c 100644 --- a/tensorflow/python/debug/lib/debug_utils_test.py +++ b/tensorflow/python/debug/lib/debug_utils_test.py @@ -46,8 +46,8 @@ class DebugUtilsTest(test_util.TensorFlowTestCase): cls._b_init = constant_op.constant( cls._b_init_val, shape=[2, 1], name="b_init") - cls._a = variables.Variable(cls._a_init, name="a1") - cls._b = variables.Variable(cls._b_init, name="b") + cls._a = variables.VariableV1(cls._a_init, name="a1") + cls._b = variables.VariableV1(cls._b_init, name="b") cls._c = constant_op.constant(cls._c_val, shape=[2, 1], name="c") # Matrix product of a and b. diff --git a/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py b/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py index 46a7be5808..74498c8ea3 100644 --- a/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py +++ b/tensorflow/python/debug/lib/dist_session_debug_grpc_test.py @@ -118,8 +118,8 @@ class DistributedSessionDebugTest(test_util.TensorFlowTestCase): """ with ops.Graph().as_default() as graph: with ops.device("/job:worker/task:0/cpu:0"): - self.a = variables.Variable(10.0, name="a") - self.b = variables.Variable(100.0, name="b") + self.a = variables.VariableV1(10.0, name="a") + self.b = variables.VariableV1(100.0, name="b") self.inc_a = state_ops.assign_add(self.a, 2.0, name="inc_a") self.dec_b = state_ops.assign_add(self.b, -5.0, name="dec_b") self.p = math_ops.multiply(self.inc_a, self.dec_b, name="p") diff --git a/tensorflow/python/debug/lib/grpc_large_data_test.py b/tensorflow/python/debug/lib/grpc_large_data_test.py index 5bc477a9ba..ccc21bcf94 100644 --- a/tensorflow/python/debug/lib/grpc_large_data_test.py +++ b/tensorflow/python/debug/lib/grpc_large_data_test.py @@ -61,7 +61,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase): with self.test_session( use_gpu=True, config=session_debug_testlib.no_rewrite_session_config()) as sess: - u = variables.Variable(42.0, name="original_u") + u = variables.VariableV1(42.0, name="original_u") for _ in xrange(50 * 1000): u = array_ops.identity(u) sess.run(variables.global_variables_initializer()) @@ -94,7 +94,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase): u_init = constant_op.constant( u_init_val_array, dtype=dtypes.float32, name="u_init") - u = variables.Variable(u_init, name="u") + u = variables.VariableV1(u_init, name="u") def watch_fn(fetches, feeds): del fetches, feeds # Unused by this watch_fn. @@ -117,7 +117,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase): b"", b"spam", b"A" * 2500 * 1024, b"B" * 2500 * 1024, b"egg", b""] u_init = constant_op.constant( u_init_val, dtype=dtypes.string, name="u_init") - u = variables.Variable(u_init, name="u") + u = variables.VariableV1(u_init, name="u") def watch_fn(fetches, feeds): del fetches, feeds @@ -146,7 +146,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase): u_init = constant_op.constant( u_init_val_array, dtype=dtypes.string, name="u_init") - u = variables.Variable(u_init, name="u") + u = variables.VariableV1(u_init, name="u") def watch_fn(fetches, feeds): del fetches, feeds @@ -167,7 +167,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase): config=session_debug_testlib.no_rewrite_session_config()) as sess: u_init = constant_op.constant( [], dtype=dtypes.float32, shape=[0], name="u_init") - u = variables.Variable(u_init, name="u") + u = variables.VariableV1(u_init, name="u") def watch_fn(fetches, feeds): del fetches, feeds @@ -189,7 +189,7 @@ class LargeGraphAndLargeTensorsDebugTest(test_util.TensorFlowTestCase): config=session_debug_testlib.no_rewrite_session_config()) as sess: u_init = constant_op.constant( [], dtype=dtypes.string, shape=[0], name="u_init") - u = variables.Variable(u_init, name="u") + u = variables.VariableV1(u_init, name="u") def watch_fn(fetches, feeds): del fetches, feeds diff --git a/tensorflow/python/debug/lib/session_debug_file_test.py b/tensorflow/python/debug/lib/session_debug_file_test.py index ba0f15b4e2..1874160dd6 100644 --- a/tensorflow/python/debug/lib/session_debug_file_test.py +++ b/tensorflow/python/debug/lib/session_debug_file_test.py @@ -58,9 +58,9 @@ class SessionDebugFileTest(session_debug_testlib.SessionDebugTestBase): v_name = "diff_Watch/v" u_init = constant_op.constant(u_init_val, shape=[2, 2]) - u = variables.Variable(u_init, name=u_name) + u = variables.VariableV1(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) - v = variables.Variable(v_init, name=v_name) + v = variables.VariableV1(v_init, name=v_name) w = math_ops.matmul(u, v, name="diff_Watch/matmul") diff --git a/tensorflow/python/debug/lib/session_debug_grpc_test.py b/tensorflow/python/debug/lib/session_debug_grpc_test.py index 91f21cb1f3..bfc9a3a382 100644 --- a/tensorflow/python/debug/lib/session_debug_grpc_test.py +++ b/tensorflow/python/debug/lib/session_debug_grpc_test.py @@ -148,8 +148,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase): sess, "localhost:%d" % self._server_port, watch_fn="foo") def testGrpcDebugWrapperSessionWithoutWatchFnWorks(self): - u = variables.Variable(2.1, name="u") - v = variables.Variable(20.0, name="v") + u = variables.VariableV1(2.1, name="u") + v = variables.VariableV1(20.0, name="v") w = math_ops.multiply(u, v, name="w") sess = session.Session( @@ -175,8 +175,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase): del feeds, fetch_keys return ["DebugIdentity", "DebugNumericSummary"], r".*/read", None - u = variables.Variable(2.1, name="u") - v = variables.Variable(20.0, name="v") + u = variables.VariableV1(2.1, name="u") + v = variables.VariableV1(20.0, name="v") w = math_ops.multiply(u, v, name="w") sess = session.Session( @@ -209,8 +209,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase): op_type_regex_whitelist=None, tolerate_debug_op_creation_failures=True) - u = variables.Variable(2.1, name="u") - v = variables.Variable(20.0, name="v") + u = variables.VariableV1(2.1, name="u") + v = variables.VariableV1(20.0, name="v") w = math_ops.multiply(u, v, name="w") sess = session.Session( @@ -241,8 +241,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase): 14, len(dump.get_tensors("v/read", 0, "DebugNumericSummary")[0])) def testTensorBoardDebugHookWorks(self): - u = variables.Variable(2.1, name="u") - v = variables.Variable(20.0, name="v") + u = variables.VariableV1(2.1, name="u") + v = variables.VariableV1(20.0, name="v") w = math_ops.multiply(u, v, name="w") sess = session.Session( @@ -286,8 +286,8 @@ class SessionDebugGrpcTest(session_debug_testlib.SessionDebugTestBase): self._server.query_source_file_line(__file__, 1) def testTensorBoardDebugHookDisablingTracebackSourceCodeSendingWorks(self): - u = variables.Variable(2.1, name="u") - v = variables.Variable(20.0, name="v") + u = variables.VariableV1(2.1, name="u") + v = variables.VariableV1(20.0, name="v") w = math_ops.multiply(u, v, name="w") sess = session.Session( @@ -381,8 +381,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase): def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenDebugNodes(self): with session.Session( config=session_debug_testlib.no_rewrite_session_config()) as sess: - v_1 = variables.Variable(50.0, name="v_1") - v_2 = variables.Variable(-50.0, name="v_1") + v_1 = variables.VariableV1(50.0, name="v_1") + v_2 = variables.VariableV1(-50.0, name="v_1") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") @@ -451,8 +451,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase): with session.Session( config=session_debug_testlib.no_rewrite_session_config()) as sess: - v_1 = variables.Variable(50.0, name="v_1") - v_2 = variables.Variable(-50.0, name="v_1") + v_1 = variables.VariableV1(50.0, name="v_1") + v_2 = variables.VariableV1(-50.0, name="v_1") # These two nodes have names that match those in the # toggle_watch_on_core_metadata argument used when calling # start_server_on_separate_thread(). @@ -491,7 +491,7 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase): def testToggleEnableTwoDebugWatchesNoCrosstalkBetweenServers(self): with session.Session( config=session_debug_testlib.no_rewrite_session_config()) as sess: - v = variables.Variable(50.0, name="v") + v = variables.VariableV1(50.0, name="v") delta = constant_op.constant(5.0, name="delta") inc_v = state_ops.assign_add(v, delta, name="inc_v") @@ -534,8 +534,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase): def testToggleBreakpointsWorks(self): with session.Session( config=session_debug_testlib.no_rewrite_session_config()) as sess: - v_1 = variables.Variable(50.0, name="v_1") - v_2 = variables.Variable(-50.0, name="v_2") + v_1 = variables.VariableV1(50.0, name="v_1") + v_2 = variables.VariableV1(-50.0, name="v_2") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") @@ -592,8 +592,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase): def testTensorBoardDebuggerWrapperToggleBreakpointsWorks(self): with session.Session( config=session_debug_testlib.no_rewrite_session_config()) as sess: - v_1 = variables.Variable(50.0, name="v_1") - v_2 = variables.Variable(-50.0, name="v_2") + v_1 = variables.VariableV1(50.0, name="v_1") + v_2 = variables.VariableV1(-50.0, name="v_2") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") @@ -665,8 +665,8 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase): def testTensorBoardDebuggerWrapperDisablingTracebackSourceSendingWorks(self): with session.Session( config=session_debug_testlib.no_rewrite_session_config()) as sess: - v_1 = variables.Variable(50.0, name="v_1") - v_2 = variables.Variable(-50.0, name="v_2") + v_1 = variables.VariableV1(50.0, name="v_1") + v_2 = variables.VariableV1(-50.0, name="v_2") delta_1 = constant_op.constant(5.0, name="delta_1") delta_2 = constant_op.constant(-5.0, name="delta_2") inc_v_1 = state_ops.assign_add(v_1, delta_1, name="inc_v_1") @@ -699,7 +699,7 @@ class SessionDebugGrpcGatingTest(test_util.TensorFlowTestCase): def testGetGrpcDebugWatchesReturnsCorrectAnswer(self): with session.Session() as sess: - v = variables.Variable(50.0, name="v") + v = variables.VariableV1(50.0, name="v") delta = constant_op.constant(5.0, name="delta") inc_v = state_ops.assign_add(v, delta, name="inc_v") @@ -743,7 +743,7 @@ class DelayedDebugServerTest(test_util.TensorFlowTestCase): with self.cached_session() as sess: a_init = constant_op.constant(42.0, name="a_init") - a = variables.Variable(a_init, name="a") + a = variables.VariableV1(a_init, name="a") def watch_fn(fetches, feeds): del fetches, feeds diff --git a/tensorflow/python/debug/lib/session_debug_testlib.py b/tensorflow/python/debug/lib/session_debug_testlib.py index 070d9c4cd7..25ef91b575 100644 --- a/tensorflow/python/debug/lib/session_debug_testlib.py +++ b/tensorflow/python/debug/lib/session_debug_testlib.py @@ -70,7 +70,7 @@ class _RNNCellForTest(rnn_cell_impl.RNNCell): def __init__(self, input_output_size, state_size): self._input_output_size = input_output_size self._state_size = state_size - self._w = variables.Variable(1.0, dtype=dtypes.float32, name="w") + self._w = variables.VariableV1(1.0, dtype=dtypes.float32, name="w") @property def output_size(self): @@ -182,9 +182,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): w_name = "w" u_init = constant_op.constant(u_init_val, shape=[2, 2]) - u = variables.Variable(u_init, name=u_name) + u = variables.VariableV1(u_init, name=u_name) v_init = constant_op.constant(v_init_val, shape=[2, 1]) - v = variables.Variable(v_init, name=v_name) + v = variables.VariableV1(v_init, name=v_name) w = math_ops.matmul(u, v, name=w_name) @@ -221,8 +221,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testCopyNodesHaveCorrectDebugOpsAndURLsAttributeValues(self): with session.Session() as sess: - u = variables.Variable(2.1, name="u") - v = variables.Variable(20.0, name="v") + u = variables.VariableV1(2.1, name="u") + v = variables.VariableV1(20.0, name="v") w = math_ops.multiply(u, v, name="w") sess.run(variables.global_variables_initializer()) @@ -324,8 +324,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): str1_name = "str1" str2_name = "str2" - str1 = variables.Variable(str1_init, name=str1_name) - str2 = variables.Variable(str2_init, name=str2_name) + str1 = variables.VariableV1(str1_init, name=str1_name) + str2 = variables.VariableV1(str2_init, name=str2_name) # Concatenate str1 and str2 str_concat = math_ops.add(str1, str2, name="str_concat") @@ -387,9 +387,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): s_name = "%s/s" % op_namespace u_init = constant_op.constant(u_init_val, shape=[2, 2]) - u = variables.Variable(u_init, name=u_name) + u = variables.VariableV1(u_init, name=u_name) s_init = constant_op.constant(s_init_val) - s = variables.Variable(s_init, name=s_name) + s = variables.VariableV1(s_init, name=s_name) run_options = config_pb2.RunOptions(output_partition_graphs=True) debug_urls = self._debug_urls() @@ -439,7 +439,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): u_init_val = np.array(11.0) u_init = constant_op.constant(u_init_val) - u = variables.Variable(u_init, name=u_name) + u = variables.VariableV1(u_init, name=u_name) # "v" is the increment. v_name = "testDumpToFileWhileLoop/v" @@ -447,7 +447,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): v_init_val = np.array(2.0) v_init = constant_op.constant(v_init_val) - v = variables.Variable(v_init, name=v_name) + v = variables.VariableV1(v_init, name=v_name) u.initializer.run() v.initializer.run() @@ -605,8 +605,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testDebugCondWatchingWholeGraphWorks(self): with session.Session() as sess: - x = variables.Variable(10.0, name="x") - y = variables.Variable(20.0, name="y") + x = variables.VariableV1(10.0, name="x") + y = variables.VariableV1(20.0, name="y") cond = control_flow_ops.cond( x > y, lambda: math_ops.add(x, 1), lambda: math_ops.add(y, 1)) @@ -628,9 +628,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): z_name = "testFindNodesWithBadTensorValues/z" u_init = constant_op.constant([2.0, 4.0]) - u = variables.Variable(u_init, name=u_name) + u = variables.VariableV1(u_init, name=u_name) v_init = constant_op.constant([2.0, 1.0]) - v = variables.Variable(v_init, name=v_name) + v = variables.VariableV1(v_init, name=v_name) # Expected output: [0.0, 3.0] w = math_ops.subtract(u, v, name=w_name) @@ -679,9 +679,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): z_name = "testFindInfOrNanWithOpNameExclusion/z" u_init = constant_op.constant([2.0, 4.0]) - u = variables.Variable(u_init, name=u_name) + u = variables.VariableV1(u_init, name=u_name) v_init = constant_op.constant([2.0, 1.0]) - v = variables.Variable(v_init, name=v_name) + v = variables.VariableV1(v_init, name=v_name) # Expected output: [0.0, 3.0] w = math_ops.subtract(u, v, name=w_name) @@ -725,7 +725,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): w_name = "testDumpGraphStructureLookup/w" u_init = constant_op.constant([2.0, 4.0]) - u = variables.Variable(u_init, name=u_name) + u = variables.VariableV1(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) @@ -859,9 +859,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testGraphPathFindingOnControlEdgesWorks(self): with session.Session(config=no_rewrite_session_config()) as sess: - v1 = variables.Variable(1.0, name="v1") - v2 = variables.Variable(2.0, name="v2") - v3 = variables.Variable(3.0, name="v3") + v1 = variables.VariableV1(1.0, name="v1") + v2 = variables.VariableV1(2.0, name="v2") + v3 = variables.VariableV1(3.0, name="v3") a = math_ops.add(v1, v2, name="a") with ops.control_dependencies([a]): c = math_ops.subtract(v3, v3, name="c") @@ -875,8 +875,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testGraphPathFindingReverseRefEdgeWorks(self): with session.Session(config=no_rewrite_session_config()) as sess: - v = variables.Variable(10.0, name="v") - delta = variables.Variable(1.0, name="delta") + v = variables.VariableV1(10.0, name="v") + delta = variables.VariableV1(1.0, name="delta") inc_v = state_ops.assign_add(v, delta, name="inc_v") sess.run(variables.global_variables_initializer()) @@ -894,7 +894,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): w_name = "testDumpCausalityCheck/w" u_init = constant_op.constant([2.0, 4.0]) - u = variables.Variable(u_init, name=u_name) + u = variables.VariableV1(u_init, name=u_name) v = math_ops.add(u, u, name=v_name) w = math_ops.add(v, v, name=w_name) @@ -980,7 +980,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): w_name = "oneOfTwoSlots/w" y_name = "oneOfTwoSlots/y" - x = variables.Variable([1, 3, 3, 7], dtype=dtypes.int32, name=x_name) + x = variables.VariableV1([1, 3, 3, 7], dtype=dtypes.int32, name=x_name) sess.run(x.initializer) unique_x, indices, _ = array_ops.unique_with_counts(x, name=u_name) @@ -1039,9 +1039,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): with session.Session(config=no_rewrite_session_config()) as sess: u_init = constant_op.constant(10.0) - u = variables.Variable(u_init, name="gdo/u") + u = variables.VariableV1(u_init, name="gdo/u") v_init = constant_op.constant(20.0) - v = variables.Variable(v_init, name="gdo/v") + v = variables.VariableV1(v_init, name="gdo/v") w = math_ops.multiply(u, v, name="gdo/w") # gdo stands for GradientDescentOptimizer. @@ -1085,7 +1085,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): with session.Session() as sess: x_init = constant_op.constant([2, 2, 3, 5, 5]) - x = variables.Variable(x_init, name="unconnected/x") + x = variables.VariableV1(x_init, name="unconnected/x") # The UniqueOp (tf.unique) has two output slots. Use only slot 0 in the # graph. Let the debugger watch the unused slot 1. @@ -1225,14 +1225,14 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testDebugNumericSummaryOnInitializedTensorGivesCorrectResult(self): with session.Session(config=no_rewrite_session_config()) as sess: - a = variables.Variable( + a = variables.VariableV1( [ np.nan, np.nan, 0.0, 0.0, 0.0, -1.0, -3.0, 3.0, 7.0, -np.inf, -np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.nan, np.nan ], dtype=np.float32, name="numeric_summary/a") - b = variables.Variable( + b = variables.VariableV1( [0.0] * 18, dtype=np.float32, name="numeric_summary/b") c = math_ops.add(a, b, name="numeric_summary/c") @@ -1249,7 +1249,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testDebugNumericSummaryOnUninitializedTensorGivesCorrectResult(self): with session.Session() as sess: - a = variables.Variable( + a = variables.VariableV1( [42], dtype=np.float32, name="numeric_summary_uninit/a") _, dump = self._debug_run_and_get_dump( @@ -1275,9 +1275,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testDebugNumericSummaryFailureIsToleratedWhenOrdered(self): with session.Session() as sess: - a = variables.Variable("1", name="a") - b = variables.Variable("3", name="b") - c = variables.Variable("2", name="c") + a = variables.VariableV1("1", name="a") + b = variables.VariableV1("3", name="b") + c = variables.VariableV1("2", name="c") d = math_ops.add(a, b, name="d") e = math_ops.add(d, c, name="e") @@ -1313,9 +1313,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testDebugNumericSummaryInvalidAttributesStringAreCaught(self): with session.Session(config=no_rewrite_session_config()) as sess: - a = variables.Variable(10.0, name="a") - b = variables.Variable(0.0, name="b") - c = variables.Variable(0.0, name="c") + a = variables.VariableV1(10.0, name="a") + b = variables.VariableV1(0.0, name="b") + c = variables.VariableV1(0.0, name="c") x = math_ops.divide(a, b, name="x") y = math_ops.multiply(x, c, name="y") @@ -1361,9 +1361,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testDebugNumericSummaryMuteOnHealthyMutesOnlyHealthyTensorDumps(self): with session.Session(config=no_rewrite_session_config()) as sess: - a = variables.Variable(10.0, name="a") - b = variables.Variable(0.0, name="b") - c = variables.Variable(0.0, name="c") + a = variables.VariableV1(10.0, name="a") + b = variables.VariableV1(0.0, name="b") + c = variables.VariableV1(0.0, name="c") x = math_ops.divide(a, b, name="x") y = math_ops.multiply(x, c, name="y") @@ -1396,8 +1396,8 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testDebugNumericSummaryMuteOnHealthyAndCustomBoundsWork(self): with session.Session() as sess: - a = variables.Variable([10.0, 10.0], name="a") - b = variables.Variable([10.0, 2.0], name="b") + a = variables.VariableV1([10.0, 10.0], name="a") + b = variables.VariableV1([10.0, 2.0], name="b") x = math_ops.add(a, b, name="x") # [20.0, 12.0] y = math_ops.divide(x, b, name="y") # [2.0, 6.0] @@ -1436,9 +1436,9 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): def testLookUpNodePythonTracebackWorks(self): with session.Session() as sess: u_init = constant_op.constant(10.0) - u = variables.Variable(u_init, name="traceback/u") + u = variables.VariableV1(u_init, name="traceback/u") v_init = constant_op.constant(20.0) - v = variables.Variable(v_init, name="traceback/v") + v = variables.VariableV1(v_init, name="traceback/v") w = math_ops.multiply(u, v, name="traceback/w") @@ -1487,7 +1487,7 @@ class DebugConcurrentRunCallsTest(test_util.TensorFlowTestCase): self.skipTest("No testing concurrent runs on a single GPU.") with session.Session() as sess: - v = variables.Variable(30.0, name="v") + v = variables.VariableV1(30.0, name="v") constants = [] for i in xrange(self._num_concurrent_runs): constants.append(constant_op.constant(1.0, name="c%d" % i)) diff --git a/tensorflow/python/debug/lib/stepper_test.py b/tensorflow/python/debug/lib/stepper_test.py index 9a3d0efabf..3839c67198 100644 --- a/tensorflow/python/debug/lib/stepper_test.py +++ b/tensorflow/python/debug/lib/stepper_test.py @@ -36,8 +36,8 @@ from tensorflow.python.training import gradient_descent class StepperTest(test_util.TensorFlowTestCase): def setUp(self): - self.a = variables.Variable(2.0, name="a") - self.b = variables.Variable(3.0, name="b") + self.a = variables.VariableV1(2.0, name="a") + self.b = variables.VariableV1(3.0, name="b") self.c = math_ops.multiply(self.a, self.b, name="c") # Should be 6.0. self.d = math_ops.multiply(self.a, self.a, name="d") # Should be 4.0. @@ -49,7 +49,7 @@ class StepperTest(test_util.TensorFlowTestCase): # The there nodes x, y and z form a graph with "cross-links" in. I.e., x # and y are both direct inputs to z, but x is also a direct input to y. - self.x = variables.Variable(2.0, name="x") # Should be 2.0 + self.x = variables.VariableV1(2.0, name="x") # Should be 2.0 self.y = math_ops.negative(self.x, name="y") # Should be -2.0. self.z = math_ops.multiply(self.x, self.y, name="z") # Should be -4.0. @@ -580,7 +580,7 @@ class StepperTestWithPlaceHolders(test_util.TensorFlowTestCase): class StepperAssignAddTest(test_util.TensorFlowTestCase): def setUp(self): - self.v = variables.Variable(10.0, name="v") + self.v = variables.VariableV1(10.0, name="v") self.p = math_ops.add(self.v, self.v, name="p") self.q = math_ops.multiply(self.p, self.p, name="q") self.delta = constant_op.constant(2.0, name="delta") @@ -711,9 +711,9 @@ class StepperBackwardRunTest(test_util.TensorFlowTestCase): Construct a backward graph using the GradientDescentOptimizer. """ - self.a = variables.Variable(1.0, name="a") - self.b = variables.Variable(2.0, name="b") - self.c = variables.Variable(4.0, name="c") + self.a = variables.VariableV1(1.0, name="a") + self.b = variables.VariableV1(2.0, name="b") + self.c = variables.VariableV1(4.0, name="c") self.d = math_ops.multiply(self.a, self.b, name="d") self.e = math_ops.multiply(self.b, self.c, name="e") self.f = math_ops.multiply(self.d, self.e, name="f") diff --git a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py index 254201c393..11011a5c13 100644 --- a/tensorflow/python/debug/wrappers/dumping_wrapper_test.py +++ b/tensorflow/python/debug/wrappers/dumping_wrapper_test.py @@ -46,7 +46,7 @@ class DumpingDebugWrapperSessionTest(test_util.TensorFlowTestCase): def setUp(self): self.session_root = tempfile.mkdtemp() - self.v = variables.Variable(10.0, dtype=dtypes.float32, name="v") + self.v = variables.VariableV1(10.0, dtype=dtypes.float32, name="v") self.delta = constant_op.constant(1.0, dtype=dtypes.float32, name="delta") self.eta = constant_op.constant(-1.4, dtype=dtypes.float32, name="eta") self.inc_v = state_ops.assign_add(self.v, self.delta, name="inc_v") diff --git a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py index 05c9eaa4d2..149a7497df 100644 --- a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py +++ b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py @@ -132,8 +132,8 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): def setUp(self): self._tmp_dir = tempfile.mktemp() - self.v = variables.Variable(10.0, name="v") - self.w = variables.Variable(21.0, name="w") + self.v = variables.VariableV1(10.0, name="v") + self.w = variables.VariableV1(21.0, name="w") self.delta = constant_op.constant(1.0, name="delta") self.inc_v = state_ops.assign_add(self.v, self.delta, name="inc_v") @@ -358,7 +358,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): def testDebuggingMakeCallableTensorRunnerWorks(self): wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( [["run"], ["run"]], self.sess, dump_root=self._tmp_dir) - v = variables.Variable(42) + v = variables.VariableV1(42) tensor_runner = wrapped_sess.make_callable(v) self.sess.run(v.initializer) @@ -382,7 +382,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): def testDebuggingMakeCallableOperationRunnerWorks(self): wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( [["run"], ["run"]], self.sess, dump_root=self._tmp_dir) - v = variables.Variable(10.0) + v = variables.VariableV1(10.0) inc_v = state_ops.assign_add(v, 1.0) op_runner = wrapped_sess.make_callable(inc_v.op) self.sess.run(v.initializer) @@ -403,7 +403,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): self.assertEqual(1, len(wrapped_sess.observers["debug_dumps"])) def testDebuggingMakeCallableFromOptionsWithZeroFeedWorks(self): - variable_1 = variables.Variable( + variable_1 = variables.VariableV1( 10.5, dtype=dtypes.float32, name="variable_1") a = math_ops.add(variable_1, variable_1, "callable_a") math_ops.add(a, a, "callable_b") @@ -480,7 +480,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): self.assertItemsEqual(["callable_a", "callable_b"], node_names) def testDebugMakeCallableFromOptionsWithCustomOptionsAndMetadataWorks(self): - variable_1 = variables.Variable( + variable_1 = variables.VariableV1( 10.5, dtype=dtypes.float32, name="variable_1") a = math_ops.add(variable_1, variable_1, "callable_a") math_ops.add(a, a, "callable_b") @@ -528,7 +528,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): def testRuntimeErrorBeforeGraphExecutionIsRaised(self): # Use an impossible device name to cause an error before graph execution. with ops.device("/device:GPU:1337"): - w = variables.Variable([1.0] * 10, name="w") + w = variables.VariableV1([1.0] * 10, name="w") wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( [["run"]], self.sess, dump_root=self._tmp_dir) diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 1ed5e30b0e..bc2504ca19 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -1017,7 +1017,7 @@ class EstimatorGetVariablesTest(test.TestCase): def _model_fn(features, labels, mode): _, _ = features, labels - variables.Variable(1., name='one') + variables.VariableV1(1., name='one') return model_fn_lib.EstimatorSpec( mode=mode, loss=constant_op.constant(0.), @@ -1033,8 +1033,8 @@ class EstimatorGetVariablesTest(test.TestCase): def _model_fn(features, labels, mode): _, _ = features, labels - variables.Variable(1., name='one') - variables.Variable(3., name='three') + variables.VariableV1(1., name='one') + variables.VariableV1(3., name='three') return model_fn_lib.EstimatorSpec( mode=mode, loss=constant_op.constant(0.), @@ -1178,13 +1178,13 @@ class EstimatorEvaluateTest(test.TestCase): def _model_fn(features, labels, mode, params): del features, labels, params mean = metrics_module.Mean() - mean.update_state(variables.Variable(2.) + 1) + mean.update_state(variables.VariableV1(2.) + 1) return model_fn_lib.EstimatorSpec( mode, loss=constant_op.constant(1.), eval_metric_ops={ 'mean1': mean, - 'mean2': metrics_lib.mean(variables.Variable(2.) + 1) + 'mean2': metrics_lib.mean(variables.VariableV1(2.) + 1) }) est = estimator.Estimator(model_fn=_model_fn) @@ -1332,7 +1332,7 @@ class EstimatorEvaluateTest(test.TestCase): def _model_fn_with_incremental_loss(features, labels, mode): _, _ = features, labels - local_weight = variables.Variable( + local_weight = variables.VariableV1( 0., name='local_weight', collections=[ops.GraphKeys.LOCAL_VARIABLES]) # Loss will be 2, 4, 6, ... loss = 2 * state_ops.assign_add(local_weight, 1.) @@ -1385,7 +1385,7 @@ class EstimatorEvaluateTest(test.TestCase): def _get_model_fn(val=1): def _model_fn(features, labels, mode): del features, labels # unused - variables.Variable(val, name='weight') + variables.VariableV1(val, name='weight') return model_fn_lib.EstimatorSpec( mode=mode, predictions=constant_op.constant([[1.]]), @@ -1409,7 +1409,7 @@ class EstimatorEvaluateTest(test.TestCase): def _model_fn_scaffold(features, labels, mode): _, _ = features, labels - variables.Variable(1., name='weight') + variables.VariableV1(1., name='weight') self.mock_saver = get_mock_saver() return model_fn_lib.EstimatorSpec( mode=mode, @@ -1603,7 +1603,7 @@ class EstimatorPredictTest(test.TestCase): def test_no_checkpoint_uses_init(self): def _model_fn(features, labels, mode, params, config): del features, labels, params, config - x = variables.Variable([[3.]], name='x') + x = variables.VariableV1([[3.]], name='x') return model_fn_lib.EstimatorSpec(mode, predictions=math_ops.add(x, 1.)) est = estimator.Estimator(model_fn=_model_fn) # Expected prediction value is 1 + the value of the Variable that is newly @@ -1614,7 +1614,7 @@ class EstimatorPredictTest(test.TestCase): def _make_model_fn(x): def _variable_creating_and_export_model_fn(features, labels, mode): _, _ = features, labels - x_var = variables.Variable([[x]], name='x') + x_var = variables.VariableV1([[x]], name='x') return model_fn_lib.EstimatorSpec( mode, predictions=math_ops.add(x_var, 1.), @@ -1936,7 +1936,7 @@ class EstimatorPredictTest(test.TestCase): def _model_fn(features, labels, mode): _, _ = features, labels - v = variables.Variable([[16.]], name='weight') + v = variables.VariableV1([[16.]], name='weight') prediction = v * 2 return model_fn_lib.EstimatorSpec( mode, @@ -1953,7 +1953,7 @@ class EstimatorPredictTest(test.TestCase): def _model_fn(features, labels, mode): _, _ = features, labels - v = variables.Variable([[16.]], name='weight') + v = variables.VariableV1([[16.]], name='weight') prediction = v * 2 return model_fn_lib.EstimatorSpec( mode, @@ -1974,7 +1974,7 @@ class EstimatorPredictTest(test.TestCase): def _model_fn_scaffold(features, labels, mode): _, _ = features, labels - variables.Variable(1., name='weight') + variables.VariableV1(1., name='weight') self.mock_saver = get_mock_saver() return model_fn_lib.EstimatorSpec( mode=mode, @@ -2029,7 +2029,7 @@ class EstimatorPredictTest(test.TestCase): def _model_fn_for_export_tests(features, labels, mode): _, _ = features, labels - variables.Variable(1., name='weight') + variables.VariableV1(1., name='weight') scores = constant_op.constant([3.]) classes = constant_op.constant(['wumpus']) update_global_step = state_ops.assign_add(training.get_global_step(), 1) @@ -2052,11 +2052,11 @@ def _x_y_input_fn(): def _model_fn_with_x_y(features, labels, mode): _ = labels - variables.Variable(1., name='weight') + variables.VariableV1(1., name='weight') scores = constant_op.constant([3.]) classes = constant_op.constant(['wumpus']) if mode == model_fn_lib.ModeKeys.PREDICT: - variables.Variable(36., name='name_collision') + variables.VariableV1(36., name='name_collision') return model_fn_lib.EstimatorSpec( mode, predictions=constant_op.constant(10.), @@ -2076,8 +2076,8 @@ def _model_fn_with_x_y(features, labels, mode): metrics_lib.mean( features['x'] - features['y'], name='{}mean'.format(prefix)) } - variables.Variable(1., name='later_var') - variables.Variable(3., name='name_collision') + variables.VariableV1(1., name='later_var') + variables.VariableV1(3., name='name_collision') return model_fn_lib.EstimatorSpec( mode, predictions=multiplied, @@ -2411,9 +2411,9 @@ class EstimatorExportTest(test.TestCase): def _model_fn_with_predict_only_vars(features, labels, mode): _, _ = features, labels if mode == model_fn_lib.ModeKeys.PREDICT: - variables.Variable(1., name='only_in_predict') + variables.VariableV1(1., name='only_in_predict') else: - variables.Variable(1., name='otherwise') + variables.VariableV1(1., name='otherwise') prediction = constant_op.constant(1.) return model_fn_lib.EstimatorSpec( @@ -2684,7 +2684,7 @@ class EstimatorExportTest(test.TestCase): def _model_fn_scaffold(features, labels, mode): _, _ = features, labels - variables.Variable(1., name='weight') + variables.VariableV1(1., name='weight') self.mock_saver = get_mock_saver() scores = constant_op.constant([3.]) return model_fn_lib.EstimatorSpec( @@ -2717,7 +2717,7 @@ class EstimatorExportTest(test.TestCase): def _model_fn_scaffold(features, labels, mode): _, _ = features, labels - variables.Variable(1., name='weight') + variables.VariableV1(1., name='weight') scores = constant_op.constant([3.]) if mode == model_fn_lib.ModeKeys.PREDICT: @@ -2762,8 +2762,8 @@ class EstimatorExportTest(test.TestCase): def _model_fn_scaffold(features, labels, mode): _, _ = features, labels - my_int = variables.Variable(1, name='my_int', - collections=[ops.GraphKeys.LOCAL_VARIABLES]) + my_int = variables.VariableV1(1, name='my_int', + collections=[ops.GraphKeys.LOCAL_VARIABLES]) _ = training.get_or_create_steps_per_run_variable() scores = constant_op.constant([3.]) with ops.control_dependencies([ @@ -2808,8 +2808,8 @@ class EstimatorExportTest(test.TestCase): def _model_fn_scaffold(features, labels, mode): _, _ = features, labels - my_int = variables.Variable(1, name='my_int', - collections=[ops.GraphKeys.LOCAL_VARIABLES]) + my_int = variables.VariableV1(1, name='my_int', + collections=[ops.GraphKeys.LOCAL_VARIABLES]) scores = constant_op.constant([3.]) with ops.control_dependencies([ variables.local_variables_initializer(), @@ -3038,7 +3038,7 @@ class EstimatorExportTest(test.TestCase): def _model_fn(features, labels, mode): _, _ = features, labels - variables.Variable(1., name='weight') + variables.VariableV1(1., name='weight') return model_fn_lib.EstimatorSpec( mode, predictions=constant_op.constant(10.), @@ -3081,7 +3081,7 @@ class EstimatorHookOrderingTest(test.TestCase): """A graph that generates NaN's for testing.""" del features, labels - global_step = variables.Variable( + global_step = variables.VariableV1( 0, dtype=dtypes.int64, name='global_step') inc_global_step = state_ops.assign_add(global_step, 1) nan_const = constant_op.constant(np.nan, dtype=dtypes.float32) diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index f740e5cfaa..87f567db0e 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -113,7 +113,7 @@ class FunctionTest(test.TestCase): return a with ops.Graph().as_default(): - var = variables.Variable([18.0]) + var = variables.VariableV1([18.0]) call = MyIdentityFunc(var._ref()) # pylint: disable=protected-access self.assertEqual("MyIdentity", call.op.name) for cfg in _OptimizerOptions(): diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index 2dafb94ba7..563a177dd0 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -104,13 +104,13 @@ class DeviceFunctionsTest(test.TestCase): def testNestedDeviceFunctions(self): with ops.Graph().as_default(): - var_0 = variables.Variable(0) + var_0 = variables.VariableV1(0) with ops.device(test_device_func_pin_variable_to_cpu): - var_1 = variables.Variable(1) + var_1 = variables.VariableV1(1) with ops.device(lambda op: "/device:GPU:0"): - var_2 = variables.Variable(2) + var_2 = variables.VariableV1(2) with ops.device("/device:GPU:0"): # Implicit merging device function. - var_3 = variables.Variable(3) + var_3 = variables.VariableV1(3) self.assertDeviceEqual(var_0.device, None) self.assertDeviceEqual(var_1.device, "/device:CPU:0") diff --git a/tensorflow/python/framework/subscribe_test.py b/tensorflow/python/framework/subscribe_test.py index 1d594e4078..cab426844d 100644 --- a/tensorflow/python/framework/subscribe_test.py +++ b/tensorflow/python/framework/subscribe_test.py @@ -212,8 +212,8 @@ class SubscribeTest(test_util.TensorFlowTestCase): def testSubscribeVariable(self): """Confirm that variables can be subscribed.""" - v1 = variables.Variable(0.0) - v2 = variables.Variable(4.0) + v1 = variables.VariableV1(0.0) + v2 = variables.VariableV1(4.0) add = math_ops.add(v1, v2) assign_v1 = v1.assign(3.0) diff --git a/tensorflow/python/grappler/item_test.py b/tensorflow/python/grappler/item_test.py index c40de9da0a..d3d96c646c 100644 --- a/tensorflow/python/grappler/item_test.py +++ b/tensorflow/python/grappler/item_test.py @@ -110,7 +110,7 @@ class ItemTest(test.TestCase): def testColocationContraints(self): with ops.Graph().as_default() as g: c = constant_op.constant([10]) - v = variables.Variable([3], dtype=dtypes.int32) + v = variables.VariableV1([3], dtype=dtypes.int32) i = gen_array_ops.ref_identity(v) a = state_ops.assign(i, c) train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index b658edff2d..03b42f6453 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -39,8 +39,8 @@ class MemoryOptimizerSwapTest(test.TestCase): def testNoSwapping(self): """Make sure the graph is preserved when there is nothing to swap.""" - a = variables.Variable(10, name='a') - b = variables.Variable(20, name='b') + a = variables.VariableV1(10, name='a') + b = variables.VariableV1(20, name='b') c = math_ops.add_n([a, b], name='c') d = math_ops.add_n([b, c], name='d') train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) @@ -60,8 +60,8 @@ class MemoryOptimizerSwapTest(test.TestCase): def testSimpleSwap(self): """Check that the swap annotations are followed.""" - a = variables.Variable(10, name='a') - b = variables.Variable(20, name='b') + a = variables.VariableV1(10, name='a') + b = variables.VariableV1(20, name='b') c = math_ops.add_n([a, b], name='c') d = math_ops.add_n([b, c], name='d') train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) @@ -244,7 +244,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase): init_op_name=init_op_name, train_op_name=train_op_name, loss_op_name=loss_op_name) - self.assertAllClose(original_loss, memory_optimized_loss, rtol=1e-4) + self.assertAllClose(original_loss, memory_optimized_loss, rtol=1e-2) def _annotated_graph(self): graph = ops.Graph() diff --git a/tensorflow/python/grappler/tf_optimizer_test.py b/tensorflow/python/grappler/tf_optimizer_test.py index 5a9afe7257..eca0f67982 100644 --- a/tensorflow/python/grappler/tf_optimizer_test.py +++ b/tensorflow/python/grappler/tf_optimizer_test.py @@ -57,7 +57,7 @@ class PyWrapOptimizeGraphTest(test.TestCase): def testKeepNodes(self): g = ops.Graph() with g.as_default(): - a1 = variables.Variable( + a1 = variables.VariableV1( 1.0) # Must be preserved since it's in the collection 'variables'. a2 = constant_op.constant(0, shape=[50, 50], name='keep') ops.add_to_collection('a2', a2) # Explicitly add to collection. diff --git a/tensorflow/python/keras/engine/base_layer.py b/tensorflow/python/keras/engine/base_layer.py index cb19a412a2..e98b131ae6 100644 --- a/tensorflow/python/keras/engine/base_layer.py +++ b/tensorflow/python/keras/engine/base_layer.py @@ -1972,7 +1972,9 @@ def make_variable(name, if use_resource is None: use_resource = True - v = tf_variables.Variable( + # TODO(apassos,rohanj) figure out how to remove collections from here so we + # can remove the V1. + v = tf_variables.VariableV1( initial_value=init_val, name=name, trainable=trainable, diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 2fe85839d0..c5547b19be 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -1001,14 +1001,14 @@ class SliceAssignTest(test_util.TensorFlowTestCase): errors.FailedPreconditionError, "Attempting to use uninitialized value Variable"): with self.cached_session() as sess: - v = variables.Variable([1, 2]) + v = variables.VariableV1([1, 2]) sess.run(v[:].assign([1, 2])) def testTypeError(self): init_val = constant_op.constant([1, 2], dtype=dtypes.int32) too_small_val = constant_op.constant([3, 4], dtype=dtypes.int8) too_large_val = constant_op.constant([3, 4], dtype=dtypes.int64) - v = variables.Variable(init_val) + v = variables.VariableV1(init_val) with self.assertRaises(TypeError): v[:].assign(too_small_val) with self.assertRaises(TypeError): diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index fc4d2a3809..083de84775 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -130,7 +130,7 @@ class ControlFlowTest(test.TestCase): def testRefIdentity(self): with self.cached_session(): - v = variables.Variable(7) + v = variables.VariableV1(7) v = control_flow_ops._Identity(v) op = state_ops.assign(v, 9) @@ -142,7 +142,7 @@ class ControlFlowTest(test.TestCase): def testRefEnter(self): with self.cached_session(): - v = variables.Variable(7) + v = variables.VariableV1(7) enter_v = control_flow_ops._Enter(v, "foo_1", is_constant=True) nine = constant_op.constant(9) @@ -155,7 +155,7 @@ class ControlFlowTest(test.TestCase): def testRefSwitch(self): with self.cached_session(): - v = variables.Variable(7) + v = variables.VariableV1(7) p = constant_op.constant(True) v1 = control_flow_ops._SwitchRefOrTensor(v._ref(), p) # pylint: disable=protected-access @@ -796,7 +796,7 @@ class ControlFlowTest(test.TestCase): def testWhileWithRefs_1(self): with self.cached_session() as sess: - x = variables.Variable(0)._ref() # pylint: disable=protected-access + x = variables.VariableV1(0)._ref() # pylint: disable=protected-access i = constant_op.constant(0) c = lambda i, x: math_ops.less(i, 100) @@ -2317,7 +2317,7 @@ class ControlFlowTest(test.TestCase): def testWhileWithRefsWithGradients_1(self): with self.cached_session() as sess: - x = variables.Variable(0.)._ref() # pylint: disable=protected-access + x = variables.VariableV1(0.)._ref() # pylint: disable=protected-access i = constant_op.constant(0) c = lambda i, x: math_ops.less(i, 10) @@ -2329,7 +2329,7 @@ class ControlFlowTest(test.TestCase): r = control_flow_ops.while_loop(c, body, [i, x], parallel_iterations=5) - grad_ys = [variables.Variable(73)._ref()] # pylint: disable=protected-access + grad_ys = [variables.VariableV1(73)._ref()] # pylint: disable=protected-access grad = gradients_impl.gradients([r[1]], [x], grad_ys=grad_ys) variables.global_variables_initializer().run() @@ -2779,7 +2779,7 @@ class ControlFlowTest(test.TestCase): def testWithOpsDependencies(self): with self.cached_session() as sess: - v = variables.Variable(0.0) + v = variables.VariableV1(0.0) c = constant_op.constant(10) # Fetching v directly will result in an uninitialized error @@ -2802,7 +2802,7 @@ class ControlFlowTest(test.TestCase): def testWithTensorDependencies(self): with self.cached_session(): - v = variables.Variable(0.0) + v = variables.VariableV1(0.0) c1 = constant_op.constant(10) c2 = constant_op.constant(20) @@ -2828,7 +2828,7 @@ class ControlFlowTest(test.TestCase): def testWithIndexedSlicesDependencies(self): with self.cached_session(): - v = variables.Variable( + v = variables.VariableV1( np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype(np.float32)) v_at_1 = ops.IndexedSlices(v, constant_op.constant([1])) gather_v_at_1 = array_ops.gather(v_at_1.values, v_at_1.indices) @@ -2851,18 +2851,18 @@ class ControlFlowTest(test.TestCase): with ops.Graph().as_default(): # device set on tensor => same device on dep. with ops.device("/job:ps"): - vd = variables.Variable([0.0]) + vd = variables.VariableV1([0.0]) with_vd_dep = control_flow_ops.with_dependencies([vd.initializer], vd) self.assertTrue("/job:ps" in with_vd_dep.device) # No device set on tensor => no device on dep. - vnod = variables.Variable([0.0]) + vnod = variables.VariableV1([0.0]) with_vnod_dep = control_flow_ops.with_dependencies([vnod.initializer], vnod) self.assertDeviceEqual(None, with_vnod_dep.device) # device set on tensor, default device on graph => default device on dep. - vdef = variables.Variable([0.0], name="vdef") + vdef = variables.VariableV1([0.0], name="vdef") with ops.device("/job:worker/device:GPU:1"): with_vdef_dep = control_flow_ops.with_dependencies([vdef.initializer], vdef) @@ -2872,8 +2872,8 @@ class ControlFlowTest(test.TestCase): def testGroup(self): with self.cached_session() as sess: - v1 = variables.Variable([0.0]) - v2 = variables.Variable([1.0]) + v1 = variables.VariableV1([0.0]) + v2 = variables.VariableV1([1.0]) # Group init1 and init2 and run. init = control_flow_ops.group(v1.initializer, v2.initializer) @@ -2955,29 +2955,29 @@ class ControlFlowTest(test.TestCase): p1 = array_ops.placeholder(dtypes.float32) p2 = array_ops.placeholder(dtypes.float32) p3 = array_ops.placeholder(dtypes.float32) - v1 = variables.Variable(p1, validate_shape=False) - v2 = variables.Variable(p2, validate_shape=False) - v3 = variables.Variable(p3, validate_shape=False) + v1 = variables.VariableV1(p1, validate_shape=False) + v2 = variables.VariableV1(p2, validate_shape=False) + v3 = variables.VariableV1(p3, validate_shape=False) self.assertIs(None, v1.get_shape().ndims) s = control_flow_ops.ref_select(index, [v1, v2, v3]) self.assertIs(None, s.get_shape().ndims) # All inputs known but different. - v1 = variables.Variable([[1, 2]]) - v2 = variables.Variable([[2], [1]]) + v1 = variables.VariableV1([[1, 2]]) + v2 = variables.VariableV1([[2], [1]]) s = control_flow_ops.ref_select(index, [v1, v2]) self.assertIs(None, s.get_shape().ndims) # All inputs known and same. - v1 = variables.Variable([[1, 2]]) - v2 = variables.Variable([[1, 2]]) + v1 = variables.VariableV1([[1, 2]]) + v2 = variables.VariableV1([[1, 2]]) s = control_flow_ops.ref_select(index, [v1, v2]) self.assertEqual([1, 2], s.get_shape()) # Possibly the same but not guaranteed. - v1 = variables.Variable([[1., 2.]]) + v1 = variables.VariableV1([[1., 2.]]) p2 = array_ops.placeholder(dtypes.float32, shape=[None, 2]) - v2 = variables.Variable(p2, validate_shape=False) + v2 = variables.VariableV1(p2, validate_shape=False) s = control_flow_ops.ref_select(index, [v1, v2]) self.assertEqual(None, s.get_shape()) @@ -3160,11 +3160,11 @@ class TupleTest(test.TestCase): def testTensors(self): for v1_first in [True, False]: with self.cached_session(): - v1 = variables.Variable([1.0]) + v1 = variables.VariableV1([1.0]) add1 = math_ops.add( control_flow_ops.with_dependencies([v1.initializer], v1._ref()), # pylint: disable=protected-access 2.0) - v2 = variables.Variable([10.0]) + v2 = variables.VariableV1([10.0]) add2 = math_ops.add( control_flow_ops.with_dependencies([v2.initializer], v2._ref()), # pylint: disable=protected-access 20.0) @@ -3190,14 +3190,14 @@ class TupleTest(test.TestCase): def testIndexedSlices(self): for v1_first in [True, False]: with self.cached_session(): - v1 = variables.Variable( + v1 = variables.VariableV1( np.array([[0.0, 1.0], [10.0, 11.0], [20.0, 21.0]]).astype( np.float32)) v1_at_1 = ops.IndexedSlices( control_flow_ops.with_dependencies([v1.initializer], v1._ref()), # pylint: disable=protected-access constant_op.constant([1])) - v2 = variables.Variable( + v2 = variables.VariableV1( np.array([[0.1, 1.1], [10.1, 11.1], [20.1, 21.1]]).astype( np.float32)) v2_at_1 = ops.IndexedSlices( @@ -3229,7 +3229,7 @@ class TupleTest(test.TestCase): def testAcceptTensorsAsControlInputs(self): with self.cached_session(): - var = variables.Variable(0) + var = variables.VariableV1(0) assign = state_ops.assign(var, 1) t, = control_flow_ops.tuple( [constant_op.constant(0)], control_inputs=[assign]) diff --git a/tensorflow/python/kernel_tests/dense_update_ops_test.py b/tensorflow/python/kernel_tests/dense_update_ops_test.py index 06c3271850..120e10314f 100644 --- a/tensorflow/python/kernel_tests/dense_update_ops_test.py +++ b/tensorflow/python/kernel_tests/dense_update_ops_test.py @@ -87,7 +87,7 @@ class AssignOpTest(test.TestCase): def testAssignNonStrictShapeChecking(self): with self.cached_session(): data = array_ops.fill([1024, 1024], 0) - p = variables.Variable([1]) + p = variables.VariableV1([1]) a = state_ops.assign(p, data, validate_shape=False) a.op.run() self.assertAllEqual(p.eval(), data.eval()) @@ -100,14 +100,14 @@ class AssignOpTest(test.TestCase): def testInitRequiredAssignAdd(self): with self.cached_session(): - p = variables.Variable(array_ops.fill([1024, 1024], 1), dtypes.int32) + p = variables.VariableV1(array_ops.fill([1024, 1024], 1), dtypes.int32) a = state_ops.assign_add(p, array_ops.fill([1024, 1024], 0)) with self.assertRaisesOpError("use uninitialized"): a.op.run() def testInitRequiredAssignSub(self): with self.cached_session(): - p = variables.Variable(array_ops.fill([1024, 1024], 1), dtypes.int32) + p = variables.VariableV1(array_ops.fill([1024, 1024], 1), dtypes.int32) a = state_ops.assign_sub(p, array_ops.fill([1024, 1024], 0)) with self.assertRaisesOpError("use uninitialized"): a.op.run() diff --git a/tensorflow/python/kernel_tests/identity_op_py_test.py b/tensorflow/python/kernel_tests/identity_op_py_test.py index 37f9f716f8..88ea10c22a 100644 --- a/tensorflow/python/kernel_tests/identity_op_py_test.py +++ b/tensorflow/python/kernel_tests/identity_op_py_test.py @@ -61,7 +61,7 @@ class IdentityOpTest(test.TestCase): def testRefIdentityShape(self): with self.cached_session(): shape = [2, 3] - tensor = variables.Variable( + tensor = variables.VariableV1( constant_op.constant( [[1, 2, 3], [6, 5, 4]], dtype=dtypes.int32)) self.assertEquals(shape, tensor.get_shape()) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index f90545f84c..1365d4b240 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -290,7 +290,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.assertEqual(self.evaluate(read), [[2]]) def testUseResource(self): - v = variables.Variable(1.0, use_resource=True) + v = variables.VariableV1(1.0, use_resource=True) self.assertTrue(isinstance(v, resource_variable_ops.ResourceVariable)) def testEagerNoUseResource(self): diff --git a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py index 86e063cb36..4b92309e4d 100644 --- a/tensorflow/python/kernel_tests/scatter_nd_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_nd_ops_test.py @@ -136,7 +136,7 @@ class StatefulScatterNdTest(test.TestCase): new = ref.copy() np_scatter(new, indices, updates) # Scatter via tensorflow - ref_var = variables.Variable(ref) + ref_var = variables.VariableV1(ref) ref_var.initializer.run() tf_scatter(ref_var, indices, updates).eval() @@ -258,7 +258,7 @@ class StatefulScatterNdTest(test.TestCase): params = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32) updates = np.array([-3, -4, -5]).astype(np.float32) with self.test_session(use_gpu=False): - ref = variables.Variable(params) + ref = variables.VariableV1(params) ref.initializer.run() # Indices all in range, no problem. diff --git a/tensorflow/python/kernel_tests/scatter_ops_test.py b/tensorflow/python/kernel_tests/scatter_ops_test.py index 1a0fa744ae..527b7daf10 100644 --- a/tensorflow/python/kernel_tests/scatter_ops_test.py +++ b/tensorflow/python/kernel_tests/scatter_ops_test.py @@ -178,7 +178,7 @@ class ScatterTest(test.TestCase): np_scatter = _TF_OPS_TO_NUMPY[tf_scatter] np_scatter(new, indices, updates) # Scatter via tensorflow - ref = variables.Variable(old) + ref = variables.VariableV1(old) ref.initializer.run() tf_scatter(ref, indices, updates).eval() self.assertAllClose(ref.eval(), new) @@ -294,7 +294,7 @@ class ScatterTest(test.TestCase): updates = np.array([-3, -4, -5]).astype(np.float32) if not test.is_gpu_available(): with self.test_session(use_gpu=False): - ref = variables.Variable(params) + ref = variables.VariableV1(params) ref.initializer.run() # Indices all in range, no problem. diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 401e1ae102..33f464fb90 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -394,10 +394,10 @@ class VariableScopeTest(test.TestCase): old = variable_scope._DEFAULT_USE_RESOURCE try: variable_scope.enable_resource_variables() - self.assertTrue(isinstance(variables_lib.Variable(1.0), + self.assertTrue(isinstance(variables_lib.VariableV1(1.0), resource_variable_ops.ResourceVariable)) variable_scope.disable_resource_variables() - self.assertFalse(isinstance(variables_lib.Variable(1.0), + self.assertFalse(isinstance(variables_lib.VariableV1(1.0), resource_variable_ops.ResourceVariable)) finally: variable_scope._DEFAULT_USE_RESOURCE = old diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py index 2e7975667c..942ceedc8b 100644 --- a/tensorflow/python/kernel_tests/variables_test.py +++ b/tensorflow/python/kernel_tests/variables_test.py @@ -43,14 +43,14 @@ class VariablesTestCase(test.TestCase): def testInitialization(self): with self.cached_session(): - var0 = variables.Variable(0.0) + var0 = variables.VariableV1(0.0) self.assertEqual("Variable:0", var0.name) self.assertEqual("Variable", var0._shared_name) self.assertEqual([], var0.get_shape()) self.assertEqual([], var0.get_shape()) self.assertEqual([], var0.shape) - var1 = variables.Variable(1.1) + var1 = variables.VariableV1(1.1) self.assertEqual("Variable_1:0", var1.name) self.assertEqual("Variable_1", var1._shared_name) self.assertEqual([], var1.get_shape()) @@ -143,7 +143,7 @@ class VariablesTestCase(test.TestCase): def testZeroSizeStringAssign(self): with self.cached_session() as sess: - array = variables.Variable( + array = variables.VariableV1( initial_value=array_ops.zeros((0,), dtype=dtypes.string), name="foo", trainable=False, @@ -192,7 +192,7 @@ class VariablesTestCase(test.TestCase): # d get the control dep. d = constant_op.constant(2.0) # variables do not. - var_x = variables.Variable(2.0) + var_x = variables.VariableV1(2.0) self.assertEqual([c.op], d.op.control_inputs) self.assertEqual([], var_x.initializer.control_inputs) self.assertEqual([], var_x.value().op.control_inputs) @@ -280,10 +280,10 @@ class VariablesTestCase(test.TestCase): def testCollections(self): with self.cached_session(): - var_x = variables.Variable(2.0) - var_y = variables.Variable(2.0, trainable=False) - var_z = variables.Variable(2.0, trainable=True) - var_t = variables.Variable( + var_x = variables.VariableV1(2.0) + var_y = variables.VariableV1(2.0, trainable=False) + var_z = variables.VariableV1(2.0, trainable=True) + var_t = variables.VariableV1( 2.0, trainable=True, collections=[ @@ -296,9 +296,9 @@ class VariablesTestCase(test.TestCase): def testCollectionsWithScope(self): with self.cached_session(): with ops.name_scope("scope_1"): - var_x = variables.Variable(2.0) + var_x = variables.VariableV1(2.0) with ops.name_scope("scope_2"): - var_y = variables.Variable(2.0) + var_y = variables.VariableV1(2.0) self.assertEqual([var_x, var_y], variables.global_variables()) self.assertEqual([var_x], variables.global_variables("scope_1")) @@ -399,7 +399,7 @@ class VariablesTestCase(test.TestCase): def testColocation(self): with ops.device("/job:ps"): - var = variables.Variable(0, name="v") + var = variables.VariableV1(0, name="v") with ops.device("/job:worker/task:7"): assign_op = var.assign(1) self.assertDeviceEqual("/job:ps", assign_op.device) @@ -522,7 +522,7 @@ class VariablesTestCase(test.TestCase): self.assertAllClose(np.ones((5, 5), np.float32), var.eval()) def testRepr(self): - var = variables.Variable(np.zeros((5, 5), np.float32), name="noop") + var = variables.VariableV1(np.zeros((5, 5), np.float32), name="noop") self.assertEqual( "", repr(var)) @@ -556,8 +556,8 @@ class IsInitializedTest(test.TestCase): def testVariableList(self): with ops.Graph().as_default(), self.cached_session() as sess: - v = variables.Variable([1, 2], name="v") - w = variables.Variable([3, 4], name="w") + v = variables.VariableV1([1, 2], name="v") + w = variables.VariableV1([3, 4], name="w") uninited = variables.report_uninitialized_variables() self.assertAllEqual(np.array([b"v", b"w"]), sess.run(uninited)) sess.run(w.initializer) @@ -593,8 +593,8 @@ class ObsoleteIsInitializedTest(test.TestCase): def testVariables(self): with ops.Graph().as_default(), self.cached_session() as sess: - v = variables.Variable([1, 2]) - w = variables.Variable([3, 4]) + v = variables.VariableV1([1, 2]) + w = variables.VariableV1([3, 4]) _ = v, w inited = variables.assert_variables_initialized() with self.assertRaisesOpError("Attempting to use uninitialized value"): @@ -604,8 +604,8 @@ class ObsoleteIsInitializedTest(test.TestCase): def testVariableList(self): with ops.Graph().as_default(), self.cached_session() as sess: - v = variables.Variable([1, 2]) - w = variables.Variable([3, 4]) + v = variables.VariableV1([1, 2]) + w = variables.VariableV1([3, 4]) inited = variables.assert_variables_initialized([v]) with self.assertRaisesOpError("Attempting to use uninitialized value"): inited.op.run() diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index 4f6e5dc473..3c9b7a01c7 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -273,7 +273,7 @@ class GradientsTest(test_util.TensorFlowTestCase): def testVariableRefGradient(self): with ops.Graph().as_default(): init = constant_op.constant(100.0) - var = variables.Variable(init) + var = variables.VariableV1(init) gradient = gradients.gradients(var._ref(), var) self.assertIsNotNone(gradient) diff --git a/tensorflow/python/ops/matmul_benchmark.py b/tensorflow/python/ops/matmul_benchmark.py index 6e5fe74290..138149e63d 100644 --- a/tensorflow/python/ops/matmul_benchmark.py +++ b/tensorflow/python/ops/matmul_benchmark.py @@ -49,13 +49,13 @@ def build_graph(device, n, m, k, transpose_a, transpose_b, dtype): """ with ops.device('%s' % device): if not transpose_a: - x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype)) + x = variables.VariableV1(random_ops.random_uniform([n, m], dtype=dtype)) else: - x = variables.Variable(random_ops.random_uniform([m, n], dtype=dtype)) + x = variables.VariableV1(random_ops.random_uniform([m, n], dtype=dtype)) if not transpose_b: - y = variables.Variable(random_ops.random_uniform([m, k], dtype=dtype)) + y = variables.VariableV1(random_ops.random_uniform([m, k], dtype=dtype)) else: - y = variables.Variable(random_ops.random_uniform([k, m], dtype=dtype)) + y = variables.VariableV1(random_ops.random_uniform([k, m], dtype=dtype)) z = math_ops.matmul(x, y, transpose_a=transpose_a, transpose_b=transpose_b) return control_flow_ops.group(z) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 562e1ad6cb..af5c7d4050 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -198,7 +198,7 @@ VariableSynchronization = variables.VariableSynchronization # pylint: disable=i VariableAggregation = variables.VariableAggregation # pylint: disable=invalid-name AUTO_REUSE = _ReuseMode.AUTO_REUSE -tf_export("AUTO_REUSE").export_constant(__name__, "AUTO_REUSE") +tf_export(v1=["AUTO_REUSE"]).export_constant(__name__, "AUTO_REUSE") AUTO_REUSE.__doc__ = """ When passed in as the value for the `reuse` flag, AUTO_REUSE indicates that get_variable() should create the requested variable if it doesn't exist or, if @@ -908,7 +908,7 @@ class _VariableStore(object): if use_resource is None: # Set the default value if unspecified. use_resource = _DEFAULT_USE_RESOURCE - v = variable( + v = variables.VariableV1( initial_value=init_val, name=name, trainable=trainable, @@ -994,7 +994,7 @@ def no_regularizer(_): # TODO(alive): support caching devices and partitioned variables in Eager mode. -@tf_export("VariableScope") +@tf_export(v1=["VariableScope"]) class VariableScope(object): """Variable scope object to carry defaults to provide to `get_variable`. @@ -1342,7 +1342,7 @@ def get_variable_scope_store(): return scope_store -@tf_export("get_variable_scope") +@tf_export(v1=["get_variable_scope"]) def get_variable_scope(): """Returns the current variable scope.""" return get_variable_scope_store().current_scope @@ -1451,7 +1451,7 @@ class EagerVariableStore(object): # The argument list for get_variable must match arguments to get_local_variable. # So, if you are updating the arguments, also update arguments to # get_local_variable below. -@tf_export("get_variable") +@tf_export(v1=["get_variable"]) def get_variable(name, shape=None, dtype=None, @@ -1596,7 +1596,7 @@ get_variable.__doc__ = get_variable_or_local_docstring % ( # The argument list for get_local_variable must match arguments to get_variable. # So, if you are updating the arguments, also update arguments to get_variable. -@tf_export("get_local_variable") +@tf_export(v1=["get_local_variable"]) def get_local_variable( # pylint: disable=missing-docstring name, shape=None, @@ -1941,7 +1941,7 @@ def _get_unique_variable_scope(prefix): # Named like a function for backwards compatibility with the # @tf_contextlib.contextmanager version, which was switched to a class to avoid # some object creation overhead. -@tf_export("variable_scope") # pylint: disable=invalid-name +@tf_export(v1=["variable_scope"]) # pylint: disable=invalid-name class variable_scope(object): """A context manager for defining ops that creates variables (layers). @@ -2322,7 +2322,7 @@ class variable_scope(object): # pylint: disable=g-doc-return-or-yield -@tf_export("variable_op_scope") +@tf_export(v1=["variable_op_scope"]) @tf_contextlib.contextmanager def variable_op_scope(values, name_or_scope, @@ -2443,7 +2443,33 @@ def default_variable_creator(next_creator=None, **kwargs): expected_shape=expected_shape, import_scope=import_scope) +def default_variable_creator_v2(next_creator=None, **kwargs): + """Default variable creator.""" + assert next_creator is None + initial_value = kwargs.get("initial_value", None) + trainable = kwargs.get("trainable", None) + validate_shape = kwargs.get("validate_shape", True) + caching_device = kwargs.get("caching_device", None) + name = kwargs.get("name", None) + variable_def = kwargs.get("variable_def", None) + dtype = kwargs.get("dtype", None) + import_scope = kwargs.get("import_scope", None) + constraint = kwargs.get("constraint", None) + + # Set trainable value based on synchronization value. + synchronization = kwargs.get("synchronization", VariableSynchronization.AUTO) + trainable = _get_trainable_value( + synchronization=synchronization, trainable=trainable) + + return resource_variable_ops.ResourceVariable( + initial_value=initial_value, trainable=trainable, + validate_shape=validate_shape, caching_device=caching_device, + name=name, dtype=dtype, constraint=constraint, variable_def=variable_def, + import_scope=import_scope) + + variables.default_variable_creator = default_variable_creator +variables.default_variable_creator_v2 = default_variable_creator_v2 def _make_getter(captured_getter, captured_previous): @@ -2452,11 +2478,12 @@ def _make_getter(captured_getter, captured_previous): # TODO(apassos) remove forwarding symbol -variable = variables.Variable +variable = variables.VariableV1 +@tf_export(v1=["variable_creator_scope"]) @tf_contextlib.contextmanager -def variable_creator_scope(variable_creator): +def variable_creator_scope_v1(variable_creator): """Scope which defines a variable creation function to be used by variable(). variable_creator is expected to be a function with the following signature: @@ -2527,3 +2554,73 @@ def variable_creator_scope(variable_creator): """ with ops.get_default_graph()._variable_creator_scope(variable_creator): # pylint: disable=protected-access yield + + +# Note: only the docstrings differ between this and v1. +@tf_export(v2=["variable_creator_scope"]) +@tf_contextlib.contextmanager +def variable_creator_scope(variable_creator): + """Scope which defines a variable creation function to be used by variable(). + + variable_creator is expected to be a function with the following signature: + + ``` + def variable_creator(next_creator, **kwargs) + ``` + + The creator is supposed to eventually call the next_creator to create a + variable if it does want to create a variable and not call Variable or + ResourceVariable directly. This helps make creators composable. A creator may + choose to create multiple variables, return already existing variables, or + simply register that a variable was created and defer to the next creators in + line. Creators can also modify the keyword arguments seen by the next + creators. + + Custom getters in the variable scope will eventually resolve down to these + custom creators when they do create variables. + + The valid keyword arguments in kwds are: + initial_value: A `Tensor`, or Python object convertible to a `Tensor`, + which is the initial value for the Variable. The initial value must have + a shape specified unless `validate_shape` is set to False. Can also be a + callable with no argument that returns the initial value when called. In + that case, `dtype` must be specified. (Note that initializer functions + from init_ops.py must first be bound to a shape before being used here.) + trainable: If `True`, the default, GradientTapes automatically watch + uses of this Variable. + validate_shape: If `False`, allows the variable to be initialized with a + value of unknown shape. If `True`, the default, the shape of + `initial_value` must be known. + caching_device: Optional device string describing where the Variable + should be cached for reading. Defaults to the Variable's device. + If not `None`, caches on another device. Typical use is to cache + on the device where the Ops using the Variable reside, to deduplicate + copying through `Switch` and other conditional statements. + name: Optional name for the variable. Defaults to `'Variable'` and gets + uniquified automatically. + dtype: If set, initial_value will be converted to the given type. + If `None`, either the datatype will be kept (if `initial_value` is + a Tensor), or `convert_to_tensor` will decide. + constraint: A constraint function to be applied to the variable after + updates by some algorithms. + synchronization: Indicates when a distributed a variable will be + aggregated. Accepted values are constants defined in the class + `tf.VariableSynchronization`. By default the synchronization is set to + `AUTO` and the current `DistributionStrategy` chooses + when to synchronize. If `synchronization` is set to `ON_READ`, + `trainable` must not be set to `True`. + aggregation: Indicates how a distributed variable will be aggregated. + Accepted values are constants defined in the class + `tf.VariableAggregation`. + + This set may grow over time, so it's important the signature of creators is as + mentioned above. + + Args: + variable_creator: the passed creator + + Yields: + A scope in which the creator is active + """ + with ops.get_default_graph()._variable_creator_scope(variable_creator): # pylint: disable=protected-access + yield diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 7a46157739..8da1e9fe56 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -46,6 +46,11 @@ def default_variable_creator(_, **kwds): raise NotImplementedError("variable_scope needs to be imported") +def default_variable_creator_v2(_, **kwds): + del kwds + raise NotImplementedError("variable_scope needs to be imported") + + def _make_getter(captured_getter, captured_previous): """To avoid capturing loop variables.""" def getter(**kwargs): @@ -101,21 +106,21 @@ class VariableAggregation(enum.Enum): class VariableMetaclass(type): """Metaclass to allow construction of tf.Variable to be overridden.""" - def _variable_call(cls, - initial_value=None, - trainable=None, - collections=None, - validate_shape=True, - caching_device=None, - name=None, - variable_def=None, - dtype=None, - expected_shape=None, - import_scope=None, - constraint=None, - use_resource=None, - synchronization=VariableSynchronization.AUTO, - aggregation=VariableAggregation.NONE): + def _variable_v1_call(cls, + initial_value=None, + trainable=None, + collections=None, + validate_shape=True, + caching_device=None, + name=None, + variable_def=None, + dtype=None, + expected_shape=None, + import_scope=None, + constraint=None, + use_resource=None, + synchronization=VariableSynchronization.AUTO, + aggregation=VariableAggregation.NONE): """Call on Variable class. Useful to force the signature.""" previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs) for getter in ops.get_default_graph()._variable_creator_stack: # pylint: disable=protected-access @@ -140,14 +145,49 @@ class VariableMetaclass(type): synchronization=synchronization, aggregation=aggregation) + def _variable_v2_call(cls, + initial_value=None, + trainable=None, + validate_shape=True, + caching_device=None, + name=None, + variable_def=None, + dtype=None, + import_scope=None, + constraint=None, + synchronization=VariableSynchronization.AUTO, + aggregation=VariableAggregation.NONE): + """Call on Variable class. Useful to force the signature.""" + previous_getter = lambda **kws: default_variable_creator_v2(None, **kws) + for getter in ops.get_default_graph()._variable_creator_stack: # pylint: disable=protected-access + previous_getter = _make_getter(getter, previous_getter) + + # Reset `aggregation` that is explicitly set as `None` to the enum NONE. + if aggregation is None: + aggregation = VariableAggregation.NONE + return previous_getter( + initial_value=initial_value, + trainable=trainable, + validate_shape=validate_shape, + caching_device=caching_device, + name=name, + variable_def=variable_def, + dtype=dtype, + import_scope=import_scope, + constraint=constraint, + synchronization=synchronization, + aggregation=aggregation) + def __call__(cls, *args, **kwargs): - if cls is Variable: - return cls._variable_call(*args, **kwargs) + if cls is VariableV1: + return cls._variable_v1_call(*args, **kwargs) + elif cls is Variable: + return cls._variable_v2_call(*args, **kwargs) else: return super(VariableMetaclass, cls).__call__(*args, **kwargs) -@tf_export("Variable") +@tf_export(v2=["Variable"]) class Variable(six.with_metaclass(VariableMetaclass, checkpointable.CheckpointableBase)): """See the [Variables Guide](https://tensorflow.org/guide/variables). @@ -267,16 +307,13 @@ class Variable(six.with_metaclass(VariableMetaclass, def __init__(self, initial_value=None, trainable=True, - collections=None, validate_shape=True, caching_device=None, name=None, variable_def=None, dtype=None, - expected_shape=None, import_scope=None, constraint=None, - use_resource=None, synchronization=VariableSynchronization.AUTO, aggregation=VariableAggregation.NONE): """Creates a new variable with value `initial_value`. @@ -297,11 +334,8 @@ class Variable(six.with_metaclass(VariableMetaclass, callable with no argument that returns the initial value when called. In that case, `dtype` must be specified. (Note that initializer functions from init_ops.py must first be bound to a shape before being used here.) - trainable: If `True`, the default, also adds the variable to the graph - collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as - the default list of variables to use by the `Optimizer` classes. - collections: List of graph collections keys. The new variable is added to - these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. + trainable: If `True`, the default, GradientTapes automatically watch uses + of this variable. validate_shape: If `False`, allows the variable to be initialized with a value of unknown shape. If `True`, the default, the shape of `initial_value` must be known. @@ -319,8 +353,6 @@ class Variable(six.with_metaclass(VariableMetaclass, dtype: If set, initial_value will be converted to the given type. If `None`, either the datatype will be kept (if `initial_value` is a Tensor), or `convert_to_tensor` will decide. - expected_shape: A TensorShape. If set, initial_value is expected - to have this shape. import_scope: Optional `string`. Name scope to add to the `Variable.` Only used when initializing from protocol buffer. constraint: An optional projection function to be applied to the variable @@ -330,9 +362,6 @@ class Variable(six.with_metaclass(VariableMetaclass, variable and return the Tensor for the projected value (which must have the same shape). Constraints are not safe to use when doing asynchronous distributed training. - use_resource: if True, a ResourceVariable is created; otherwise an - old-style ref-based variable is created. When eager execution is enabled - a resource variable is always created. synchronization: Indicates when a distributed a variable will be aggregated. Accepted values are constants defined in the class `tf.VariableSynchronization`. By default the synchronization is set to @@ -1009,11 +1038,207 @@ class Variable(six.with_metaclass(VariableMetaclass, raise NotImplementedError +@tf_export(v1=["Variable"]) +class VariableV1(Variable): + """See the [Variables Guide](https://tensorflow.org/guide/variables). + + A variable maintains state in the graph across calls to `run()`. You add a + variable to the graph by constructing an instance of the class `Variable`. + + The `Variable()` constructor requires an initial value for the variable, + which can be a `Tensor` of any type and shape. The initial value defines the + type and shape of the variable. After construction, the type and shape of + the variable are fixed. The value can be changed using one of the assign + methods. + + If you want to change the shape of a variable later you have to use an + `assign` Op with `validate_shape=False`. + + Just like any `Tensor`, variables created with `Variable()` can be used as + inputs for other Ops in the graph. Additionally, all the operators + overloaded for the `Tensor` class are carried over to variables, so you can + also add nodes to the graph by just doing arithmetic on variables. + + ```python + import tensorflow as tf + + # Create a variable. + w = tf.Variable(, name=) + + # Use the variable in the graph like any Tensor. + y = tf.matmul(w, ...another variable or tensor...) + + # The overloaded operators are available too. + z = tf.sigmoid(w + y) + + # Assign a new value to the variable with `assign()` or a related method. + w.assign(w + 1.0) + w.assign_add(1.0) + ``` + + When you launch the graph, variables have to be explicitly initialized before + you can run Ops that use their value. You can initialize a variable by + running its *initializer op*, restoring the variable from a save file, or + simply running an `assign` Op that assigns a value to the variable. In fact, + the variable *initializer op* is just an `assign` Op that assigns the + variable's initial value to the variable itself. + + ```python + # Launch the graph in a session. + with tf.Session() as sess: + # Run the variable initializer. + sess.run(w.initializer) + # ...you now can run ops that use the value of 'w'... + ``` + + The most common initialization pattern is to use the convenience function + `global_variables_initializer()` to add an Op to the graph that initializes + all the variables. You then run that Op after launching the graph. + + ```python + # Add an Op to initialize global variables. + init_op = tf.global_variables_initializer() + + # Launch the graph in a session. + with tf.Session() as sess: + # Run the Op that initializes global variables. + sess.run(init_op) + # ...you can now run any Op that uses variable values... + ``` + + If you need to create a variable with an initial value dependent on another + variable, use the other variable's `initialized_value()`. This ensures that + variables are initialized in the right order. + + All variables are automatically collected in the graph where they are + created. By default, the constructor adds the new variable to the graph + collection `GraphKeys.GLOBAL_VARIABLES`. The convenience function + `global_variables()` returns the contents of that collection. + + When building a machine learning model it is often convenient to distinguish + between variables holding the trainable model parameters and other variables + such as a `global step` variable used to count training steps. To make this + easier, the variable constructor supports a `trainable=` parameter. If + `True`, the new variable is also added to the graph collection + `GraphKeys.TRAINABLE_VARIABLES`. The convenience function + `trainable_variables()` returns the contents of this collection. The + various `Optimizer` classes use this collection as the default list of + variables to optimize. + + WARNING: tf.Variable objects by default have a non-intuitive memory model. A + Variable is represented internally as a mutable Tensor which can + non-deterministically alias other Tensors in a graph. The set of operations + which consume a Variable and can lead to aliasing is undetermined and can + change across TensorFlow versions. Avoid writing code which relies on the + value of a Variable either changing or not changing as other operations + happen. For example, using Variable objects or simple functions thereof as + predicates in a `tf.cond` is dangerous and error-prone: + + ``` + v = tf.Variable(True) + tf.cond(v, lambda: v.assign(False), my_false_fn) # Note: this is broken. + ``` + + Here replacing adding `use_resource=True` when constructing the variable will + fix any nondeterminism issues: + ``` + v = tf.Variable(True, use_resource=True) + tf.cond(v, lambda: v.assign(False), my_false_fn) + ``` + + To use the replacement for variables which does + not have these issues: + + * Add `use_resource=True` when constructing `tf.Variable`; + * Call `tf.get_variable_scope().set_use_resource(True)` inside a + `tf.variable_scope` before the `tf.get_variable()` call. + """ + + def __init__(self, # pylint: disable=super-init-not-called + initial_value=None, + trainable=True, + collections=None, + validate_shape=True, + caching_device=None, + name=None, + variable_def=None, + dtype=None, + expected_shape=None, + import_scope=None, + constraint=None, + use_resource=None, + synchronization=VariableSynchronization.AUTO, + aggregation=VariableAggregation.NONE): + """Creates a new variable with value `initial_value`. + + The new variable is added to the graph collections listed in `collections`, + which defaults to `[GraphKeys.GLOBAL_VARIABLES]`. + + If `trainable` is `True` the variable is also added to the graph collection + `GraphKeys.TRAINABLE_VARIABLES`. + + This constructor creates both a `variable` Op and an `assign` Op to set the + variable to its initial value. + + Args: + initial_value: A `Tensor`, or Python object convertible to a `Tensor`, + which is the initial value for the Variable. The initial value must have + a shape specified unless `validate_shape` is set to False. Can also be a + callable with no argument that returns the initial value when called. In + that case, `dtype` must be specified. (Note that initializer functions + from init_ops.py must first be bound to a shape before being used here.) + trainable: If `True`, the default, also adds the variable to the graph + collection `GraphKeys.TRAINABLE_VARIABLES`. This collection is used as + the default list of variables to use by the `Optimizer` classes. + collections: List of graph collections keys. The new variable is added to + these collections. Defaults to `[GraphKeys.GLOBAL_VARIABLES]`. + validate_shape: If `False`, allows the variable to be initialized with a + value of unknown shape. If `True`, the default, the shape of + `initial_value` must be known. + caching_device: Optional device string describing where the Variable + should be cached for reading. Defaults to the Variable's device. + If not `None`, caches on another device. Typical use is to cache + on the device where the Ops using the Variable reside, to deduplicate + copying through `Switch` and other conditional statements. + name: Optional name for the variable. Defaults to `'Variable'` and gets + uniquified automatically. + variable_def: `VariableDef` protocol buffer. If not `None`, recreates + the Variable object with its contents, referencing the variable's nodes + in the graph, which must already exist. The graph is not changed. + `variable_def` and the other arguments are mutually exclusive. + dtype: If set, initial_value will be converted to the given type. + If `None`, either the datatype will be kept (if `initial_value` is + a Tensor), or `convert_to_tensor` will decide. + expected_shape: A TensorShape. If set, initial_value is expected + to have this shape. + import_scope: Optional `string`. Name scope to add to the + `Variable.` Only used when initializing from protocol buffer. + constraint: An optional projection function to be applied to the variable + after being updated by an `Optimizer` (e.g. used to implement norm + constraints or value constraints for layer weights). The function must + take as input the unprojected Tensor representing the value of the + variable and return the Tensor for the projected value + (which must have the same shape). Constraints are not safe to + use when doing asynchronous distributed training. + use_resource: whether to use resource variables. + synchronization: unused + aggregation: unused + + Raises: + ValueError: If both `variable_def` and initial_value are specified. + ValueError: If the initial value is not specified, or does not have a + shape and `validate_shape` is `True`. + RuntimeError: If eager execution is enabled. + """ + + SaveSliceInfo = Variable.SaveSliceInfo + + # TODO(apassos): do not repeat all comments here -class RefVariable(Variable): +class RefVariable(VariableV1): """Ref-based implementation of variables.""" - def __init__(self, + def __init__(self, # pylint: disable=super-init-not-called initial_value=None, trainable=True, collections=None, @@ -1873,7 +2098,7 @@ class RefVariable(Variable): def _OverloadAllOperators(): # pylint: disable=invalid-name """Register overloads for all operators.""" for operator in ops.Tensor.OVERLOADABLE_OPERATORS: - Variable._OverloadOperator(operator) + Variable._OverloadOperator(operator) # pylint: disable=protected-access # For slicing, bind getitem differently than a tensor (use SliceHelperVar # instead) # pylint: disable=protected-access @@ -2401,7 +2626,7 @@ class PartitionedVariable(object): "assign() has not been implemented for PartitionedVariable.") -@tf_export("global_variables") +@tf_export(v1=["global_variables"]) def global_variables(scope=None): """Returns global variables. @@ -2427,7 +2652,7 @@ def global_variables(scope=None): return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES, scope) -@tf_export("all_variables") +@tf_export(v1=["all_variables"]) @deprecated("2017-03-02", "Please use tf.global_variables instead.") def all_variables(): """See `tf.global_variables`.""" @@ -2452,7 +2677,7 @@ def _all_saveable_objects(scope=None): ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS, scope)) -@tf_export("local_variables") +@tf_export(v1=["local_variables"]) def local_variables(scope=None): """Returns local variables. @@ -2480,7 +2705,7 @@ def local_variables(scope=None): return ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES, scope) -@tf_export("model_variables") +@tf_export(v1=["model_variables"]) def model_variables(scope=None): """Returns all variables in the MODEL_VARIABLES collection. @@ -2497,7 +2722,7 @@ def model_variables(scope=None): return ops.get_collection(ops.GraphKeys.MODEL_VARIABLES, scope) -@tf_export("trainable_variables") +@tf_export(v1=["trainable_variables"]) def trainable_variables(scope=None): """Returns all variables created with `trainable=True`. @@ -2519,7 +2744,7 @@ def trainable_variables(scope=None): return ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES, scope) -@tf_export("moving_average_variables") +@tf_export(v1=["moving_average_variables"]) def moving_average_variables(scope=None): """Returns all variables that maintain their moving averages. @@ -2541,7 +2766,7 @@ def moving_average_variables(scope=None): return ops.get_collection(ops.GraphKeys.MOVING_AVERAGE_VARIABLES, scope) -@tf_export("initializers.variables", "variables_initializer") +@tf_export(v1=["initializers.variables", "variables_initializer"]) def variables_initializer(var_list, name="init"): """Returns an Op that initializes a list of variables. @@ -2567,7 +2792,7 @@ def variables_initializer(var_list, name="init"): return control_flow_ops.no_op(name=name) -@tf_export("initialize_variables") +@tf_export(v1=["initialize_variables"]) @tf_should_use.should_use_result @deprecated("2017-03-02", "Use `tf.variables_initializer` instead.") def initialize_variables(var_list, name="init"): @@ -2575,7 +2800,7 @@ def initialize_variables(var_list, name="init"): return variables_initializer(var_list, name=name) -@tf_export("initializers.global_variables", "global_variables_initializer") +@tf_export(v1=["initializers.global_variables", "global_variables_initializer"]) def global_variables_initializer(): """Returns an Op that initializes global variables. @@ -2589,7 +2814,7 @@ def global_variables_initializer(): return variables_initializer(global_variables()) -@tf_export("initialize_all_variables") +@tf_export(v1=["initialize_all_variables"]) @tf_should_use.should_use_result @deprecated("2017-03-02", "Use `tf.global_variables_initializer` instead.") def initialize_all_variables(): @@ -2597,7 +2822,7 @@ def initialize_all_variables(): return global_variables_initializer() -@tf_export("initializers.local_variables", "local_variables_initializer") +@tf_export(v1=["initializers.local_variables", "local_variables_initializer"]) def local_variables_initializer(): """Returns an Op that initializes all local variables. @@ -2611,7 +2836,7 @@ def local_variables_initializer(): return variables_initializer(local_variables()) -@tf_export("initialize_local_variables") +@tf_export(v1=["initialize_local_variables"]) @tf_should_use.should_use_result @deprecated("2017-03-02", "Use `tf.local_variables_initializer` instead.") def initialize_local_variables(): @@ -2619,7 +2844,7 @@ def initialize_local_variables(): return local_variables_initializer() -@tf_export("is_variable_initialized") +@tf_export(v1=["is_variable_initialized"]) @tf_should_use.should_use_result def is_variable_initialized(variable): """Tests if a variable has been initialized. @@ -2634,7 +2859,7 @@ def is_variable_initialized(variable): return state_ops.is_variable_initialized(variable) -@tf_export("assert_variables_initialized") +@tf_export(v1=["assert_variables_initialized"]) @tf_should_use.should_use_result def assert_variables_initialized(var_list=None): """Returns an Op to check if variables are initialized. @@ -2677,7 +2902,7 @@ def assert_variables_initialized(var_list=None): return array_ops.stack(ranks) -@tf_export("report_uninitialized_variables") +@tf_export(v1=["report_uninitialized_variables"]) @tf_should_use.should_use_result def report_uninitialized_variables(var_list=None, name="report_uninitialized_variables"): diff --git a/tensorflow/python/saved_model/loader_test.py b/tensorflow/python/saved_model/loader_test.py index b7e217a35b..924b2e7c06 100644 --- a/tensorflow/python/saved_model/loader_test.py +++ b/tensorflow/python/saved_model/loader_test.py @@ -47,8 +47,8 @@ class SavedModelLoaderTest(test.TestCase): def setUp(self): """Write test SavedModels to a temp directory.""" with session.Session(graph=ops.Graph()) as sess: - x = variables.Variable(5, name="x") - y = variables.Variable(11, name="y") + x = variables.VariableV1(5, name="x") + y = variables.VariableV1(11, name="y") z = x + y sess.run(variables.global_variables_initializer()) @@ -134,8 +134,8 @@ class SavedModelLoaderTest(test.TestCase): def test_restore_variables(self): loader = loader_impl.SavedModelLoader(SAVED_MODEL_WITH_MAIN_OP) with self.session(graph=ops.Graph()) as sess: - x = variables.Variable(0, name="x") - y = variables.Variable(0, name="y") + x = variables.VariableV1(0, name="x") + y = variables.VariableV1(0, name="y") z = x * y sess.run(variables.global_variables_initializer()) @@ -186,8 +186,10 @@ class SavedModelLoaderTest(test.TestCase): """ path = _get_export_dir("no_variable_saved_model") with session.Session(graph=ops.Graph()) as sess: - x = variables.Variable(5, name="x", collections=["not_global_variable"]) - y = variables.Variable(11, name="y", collections=["not_global_variable"]) + x = variables.VariableV1( + 5, name="x", collections=["not_global_variable"]) + y = variables.VariableV1( + 11, name="y", collections=["not_global_variable"]) self.assertFalse(variables._all_saveable_objects()) z = x + y sess.run(variables.variables_initializer([x, y])) diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py index 49d52d3bee..80b75b7ee6 100644 --- a/tensorflow/python/saved_model/saved_model_test.py +++ b/tensorflow/python/saved_model/saved_model_test.py @@ -60,7 +60,7 @@ class SavedModelTest(test.TestCase): return os.path.join(test.get_temp_dir(), label) def _init_and_validate_variable(self, sess, variable_name, variable_value): - v = variables.Variable(variable_value, name=variable_name) + v = variables.VariableV1(variable_value, name=variable_name) sess.run(variables.global_variables_initializer()) self.assertEqual(variable_value, v.eval()) @@ -458,7 +458,7 @@ class SavedModelTest(test.TestCase): # Graph with a single variable added to a collection. SavedModel invoked to: # - add with weights. with self.session(graph=ops.Graph()) as sess: - v = variables.Variable(42, name="v") + v = variables.VariableV1(42, name="v") ops.add_to_collection("foo_vars", v) sess.run(variables.global_variables_initializer()) self.assertEqual(42, v.eval()) @@ -468,7 +468,7 @@ class SavedModelTest(test.TestCase): # SavedModel invoked to: # - simply add the model (weights are not updated). with self.session(graph=ops.Graph()) as sess: - v = variables.Variable(43, name="v") + v = variables.VariableV1(43, name="v") ops.add_to_collection("bar_vars", v) sess.run(variables.global_variables_initializer()) self.assertEqual(43, v.eval()) @@ -780,13 +780,13 @@ class SavedModelTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: # Add `v1` and `v2` variables to the graph. - v1 = variables.Variable(1, name="v1") + v1 = variables.VariableV1(1, name="v1") ops.add_to_collection("v", v1) - v2 = variables.Variable(2, name="v2") + v2 = variables.VariableV1(2, name="v2") ops.add_to_collection("v", v2) # Initialize another variable `v3` to 42. - v3 = variables.Variable(42, name="v3") + v3 = variables.VariableV1(42, name="v3") ops.add_to_collection("v", v3) # Set up an assignment op to be run as part of the main_op. @@ -815,13 +815,13 @@ class SavedModelTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: # Add `v1` and `v2` variables to the graph. - v1 = variables.Variable(1, name="v1") + v1 = variables.VariableV1(1, name="v1") ops.add_to_collection("v", v1) - v2 = variables.Variable(2, name="v2") + v2 = variables.VariableV1(2, name="v2") ops.add_to_collection("v", v2) # Initialize another variable `v3` to 42. - v3 = variables.Variable(42, name="v3", trainable=False, collections=[]) + v3 = variables.VariableV1(42, name="v3", trainable=False, collections=[]) ops.add_to_collection("v", v3) # Set up an assignment op to be run as part of the legacy_init_op. @@ -860,11 +860,11 @@ class SavedModelTest(test.TestCase): g = ops.Graph() with self.session(graph=g) as sess: # Initialize variable `v1` to 1. - v1 = variables.Variable(1, name="v1") + v1 = variables.VariableV1(1, name="v1") ops.add_to_collection("v", v1) # Initialize another variable `v2` to 42. - v2 = variables.Variable(42, name="v2", trainable=False, collections=[]) + v2 = variables.VariableV1(42, name="v2", trainable=False, collections=[]) ops.add_to_collection("v", v2) # Set up an assignment op to be run as part of the init op. @@ -889,9 +889,9 @@ class SavedModelTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: # Add `v1` and `v2` variables to the graph. - v1 = variables.Variable(1, name="v1") + v1 = variables.VariableV1(1, name="v1") ops.add_to_collection("v", v1) - v2 = variables.Variable(2, name="v2") + v2 = variables.VariableV1(2, name="v2") ops.add_to_collection("v", v2) sess.run(variables.global_variables_initializer()) @@ -918,9 +918,9 @@ class SavedModelTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: # Add `v1` and `v2` variables to the graph. - v1 = variables.Variable(1, name="v1") + v1 = variables.VariableV1(1, name="v1") ops.add_to_collection("v", v1) - v2 = variables.Variable(2, name="v2") + v2 = variables.VariableV1(2, name="v2") ops.add_to_collection("v", v2) sess.run(variables.global_variables_initializer()) @@ -947,9 +947,9 @@ class SavedModelTest(test.TestCase): with self.session(graph=ops.Graph()) as sess: # Add `v1` and `v2` variables to the graph. - v1 = variables.Variable(1, name="v1") + v1 = variables.VariableV1(1, name="v1") ops.add_to_collection("v", v1) - v2 = variables.Variable(2, name="v2") + v2 = variables.VariableV1(2, name="v2") ops.add_to_collection("v", v2) sess.run(variables.global_variables_initializer()) @@ -1071,13 +1071,13 @@ class SavedModelTest(test.TestCase): graph=ops.Graph(), config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: with sess.graph.device("/cpu:0"): - v1 = variables.Variable(1, name="v1") + v1 = variables.VariableV1(1, name="v1") with sess.graph.device("/cpu:1"): - v2 = variables.Variable(2, name="v2") + v2 = variables.VariableV1(2, name="v2") # v3 is an unsaved variable derived from v1 and v2. It is used to # exercise the ability to run an init op when restoring a graph. - v3 = variables.Variable(1, name="v3", trainable=False, collections=[]) + v3 = variables.VariableV1(1, name="v3", trainable=False, collections=[]) assign_v3 = state_ops.assign(v3, math_ops.add(v1, v2)) init_op = control_flow_ops.group(assign_v3, name="init_op") @@ -1140,7 +1140,7 @@ class SavedModelTest(test.TestCase): builder = saved_model_builder.SavedModelBuilder(export_dir) with self.session(graph=ops.Graph()) as sess: - variables.Variable(1, name="v1") + variables.VariableV1(1, name="v1") sess.run(variables.global_variables_initializer()) custom_saver = training.Saver(name="my_saver") builder.add_meta_graph_and_variables(sess, ["tag"], saver=custom_saver) @@ -1162,7 +1162,7 @@ class SavedModelTest(test.TestCase): builder = saved_model_builder.SavedModelBuilder(export_dir) with self.session(graph=ops.Graph()) as sess: - variables.Variable(1, name="v1") + variables.VariableV1(1, name="v1") sess.run(variables.global_variables_initializer()) training.Saver(name="my_saver") builder.add_meta_graph_and_variables(sess, ["tag"]) @@ -1184,7 +1184,7 @@ class SavedModelTest(test.TestCase): builder = saved_model_builder.SavedModelBuilder(export_dir) with self.session(graph=ops.Graph()) as sess: - variables.Variable(1, name="v1") + variables.VariableV1(1, name="v1") sess.run(variables.global_variables_initializer()) builder.add_meta_graph_and_variables(sess, ["tag_0"]) @@ -1293,8 +1293,8 @@ class SavedModelTest(test.TestCase): # Add a graph with two float32 variables and a Complex Op composing them # with strip_default_attrs enabled. with session.Session(graph=ops.Graph()) as sess: - real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real") - imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag") + real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real") + imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag") math_ops.complex(real_num, imag_num, name="complex") sess.run(variables.global_variables_initializer()) builder.add_meta_graph_and_variables( @@ -1303,8 +1303,8 @@ class SavedModelTest(test.TestCase): # Add a graph with the same float32 variables and a Complex Op composing # them with strip_default_attrs disabled. with session.Session(graph=ops.Graph()) as sess: - real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real") - imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag") + real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real") + imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag") math_ops.complex(real_num, imag_num, name="complex") sess.run(variables.global_variables_initializer()) builder.add_meta_graph(["bar"], strip_default_attrs=False) @@ -1366,7 +1366,7 @@ class SavedModelTest(test.TestCase): # Add a graph with a single variable and a test op with a defaultless # float32 attr, "test_attr". with session.Session(graph=ops.Graph()) as sess: - variables.Variable(1.0, dtype=dtypes.float64, name="var") + variables.VariableV1(1.0, dtype=dtypes.float64, name="var") test_ops.test_attr(T=dtypes.float32, name="test_attr") sess.run(variables.global_variables_initializer()) builder.add_meta_graph_and_variables(sess, ["foo"]) diff --git a/tensorflow/python/tools/freeze_graph_test.py b/tensorflow/python/tools/freeze_graph_test.py index e38945fabc..5dc14a6961 100644 --- a/tensorflow/python/tools/freeze_graph_test.py +++ b/tensorflow/python/tools/freeze_graph_test.py @@ -60,7 +60,7 @@ class FreezeGraphTest(test_util.TensorFlowTestCase): # We'll create an input graph that has a single variable containing 1.0, # and that then multiplies it by 2. with ops.Graph().as_default(): - variable_node = variables.Variable(1.0, name="variable_node") + variable_node = variables.VariableV1(1.0, name="variable_node") output_node = math_ops.multiply(variable_node, 2.0, name="output_node") sess = session.Session() init = variables.global_variables_initializer() @@ -138,7 +138,7 @@ class FreezeGraphTest(test_util.TensorFlowTestCase): features = parsing_ops.parse_example(examples, feature_configs) feature = features[feature_name] - variable_node = variables.Variable(1.0, name="variable_node") + variable_node = variables.VariableV1(1.0, name="variable_node") scores = math_ops.multiply(variable_node, feature, name="output_node") class_feature = array_ops.fill(array_ops.shape(feature), "class_%s" % feature_name) @@ -174,7 +174,7 @@ class FreezeGraphTest(test_util.TensorFlowTestCase): output_graph_filename = os.path.join(tmp_dir, "output_graph.pb") with ops.Graph().as_default(): - variable_node = variables.Variable(1.0, name="variable_node") + variable_node = variables.VariableV1(1.0, name="variable_node") output_node = math_ops.multiply(variable_node, 2.0, name="output_node") sess = session.Session() init = variables.global_variables_initializer() diff --git a/tensorflow/python/training/checkpointable/util.py b/tensorflow/python/training/checkpointable/util.py index 56c4043d9d..eff15b24ce 100644 --- a/tensorflow/python/training/checkpointable/util.py +++ b/tensorflow/python/training/checkpointable/util.py @@ -247,7 +247,7 @@ def _default_getter(name, shape, dtype, initializer=None, def initial_value(): return initializer( shape_object.as_list(), dtype=dtype, partition_info=partition_info) - return variables.Variable( + return variables.VariableV1( initial_value=initial_value, name=name, dtype=variable_dtype, diff --git a/tensorflow/python/training/learning_rate_decay_test.py b/tensorflow/python/training/learning_rate_decay_test.py index 5a9215730e..03a32f6ca0 100644 --- a/tensorflow/python/training/learning_rate_decay_test.py +++ b/tensorflow/python/training/learning_rate_decay_test.py @@ -63,7 +63,7 @@ class LRDecayTest(test_util.TensorFlowTestCase): def testVariables(self): with self.cached_session(): - step = variables.Variable(1) + step = variables.VariableV1(1) assign_1 = step.assign(1) assign_2 = step.assign(2) assign_100 = step.assign(100) @@ -121,7 +121,7 @@ class LRDecayTest(test_util.TensorFlowTestCase): # Test that ref types are valid. if not context.executing_eagerly(): - x = variables.Variable(0.0) + x = variables.VariableV1(0.0) x_ref = x.op.outputs[0] # float32_ref tensor should be accepted boundaries, values = [1.0, 2.0], [1, 2, 3] learning_rate_decay.piecewise_constant(x_ref, boundaries, values) diff --git a/tensorflow/python/training/monitored_session_test.py b/tensorflow/python/training/monitored_session_test.py index 2d7799d66a..c870d99de9 100644 --- a/tensorflow/python/training/monitored_session_test.py +++ b/tensorflow/python/training/monitored_session_test.py @@ -69,8 +69,8 @@ class ScaffoldTest(test.TestCase): def test_defaults_empty_graph(self): with ops.Graph().as_default(): scaffold = monitored_session.Scaffold() - variables.Variable(1, name='my_var') - variables.Variable( + variables.VariableV1(1, name='my_var') + variables.VariableV1( 2, name='my_local_var', collections=[ops.GraphKeys.LOCAL_VARIABLES]) scaffold.finalize() self.assertTrue(isinstance(scaffold.init_op, ops.Operation)) @@ -105,7 +105,7 @@ class ScaffoldTest(test.TestCase): def test_caches_values(self): with ops.Graph().as_default(): - variables.Variable([1]) + variables.VariableV1([1]) scaffold1 = monitored_session.Scaffold() scaffold1.finalize() scaffold2 = monitored_session.Scaffold() @@ -119,7 +119,7 @@ class ScaffoldTest(test.TestCase): def test_raise_error_if_more_than_one_cached_item(self): with ops.Graph().as_default(): - variables.Variable([1]) + variables.VariableV1([1]) ops.add_to_collection(ops.GraphKeys.SAVERS, saver_lib.Saver()) ops.add_to_collection(ops.GraphKeys.SAVERS, saver_lib.Saver()) with self.assertRaisesRegexp(RuntimeError, 'More than one item'): @@ -127,7 +127,7 @@ class ScaffoldTest(test.TestCase): def test_uses_passed_values(self): with ops.Graph().as_default(): - variables.Variable([1]) + variables.VariableV1([1]) saver = saver_lib.Saver() scaffold = monitored_session.Scaffold( init_op=2, @@ -148,7 +148,7 @@ class ScaffoldTest(test.TestCase): def test_graph_is_finalized(self): with ops.Graph().as_default(): - variables.Variable([1]) + variables.VariableV1([1]) monitored_session.Scaffold().finalize() with self.assertRaisesRegexp(RuntimeError, 'Graph is finalized and cannot be modified'): @@ -157,7 +157,7 @@ class ScaffoldTest(test.TestCase): def test_new_scaffold_from_default_scaffold(self): scaffold1 = monitored_session.Scaffold() with ops.Graph().as_default(): - variables.Variable([1]) + variables.VariableV1([1]) saver = saver_lib.Saver() scaffold2 = monitored_session.Scaffold( init_op=2, @@ -180,7 +180,7 @@ class ScaffoldTest(test.TestCase): def test_new_scaffold_from_existing_scaffold(self): with ops.Graph().as_default(): - variables.Variable([1]) + variables.VariableV1([1]) saver = saver_lib.Saver() scaffold1 = monitored_session.Scaffold( init_op=2, @@ -1374,7 +1374,7 @@ class MonitoredSessionTest(test.TestCase): def test_defaults(self): with ops.Graph().as_default(): - a_var = variables.Variable(0) + a_var = variables.VariableV1(0) with monitored_session.MonitoredSession() as session: self.assertEqual(0, session.run(a_var)) @@ -1700,7 +1700,7 @@ class MonitoredSessionTest(test.TestCase): def test_graph_finalized_during_run_unfinalized_after_exit(self): with ops.Graph().as_default() as g: - a_var = variables.Variable(0) + a_var = variables.VariableV1(0) with monitored_session.MonitoredSession() as session: self.assertEqual(0, session.run(a_var)) self.assertTrue(g.finalized) @@ -1708,7 +1708,7 @@ class MonitoredSessionTest(test.TestCase): def test_keep_finalized_graph_as_finalized(self): with ops.Graph().as_default() as g: - a_var = variables.Variable(0) + a_var = variables.VariableV1(0) monitored_session.Scaffold().finalize() with monitored_session.MonitoredSession() as session: self.assertEqual(0, session.run(a_var)) @@ -2032,7 +2032,7 @@ class MonitoredSessionTest(test.TestCase): with ops.Graph().as_default(): c = array_ops.placeholder(dtypes.float32) v = array_ops.identity(c) - graph_state = variables.Variable(0.0) + graph_state = variables.VariableV1(0.0) graph_side_effect = state_ops.assign_add(graph_state, 0.31) def step_fn(step_context): @@ -2088,7 +2088,7 @@ class MonitoredSessionTest(test.TestCase): c = array_ops.placeholder(dtypes.float32) v = array_ops.identity(c) vv = constant_op.constant(3.2) - graph_state = variables.Variable(0.0) + graph_state = variables.VariableV1(0.0) graph_side_effect = state_ops.assign_add(graph_state, 0.31) class Hook(session_run_hook.SessionRunHook): @@ -2125,7 +2125,7 @@ class SingularMonitoredSessionTest(test.TestCase): def test_handles_initialization(self): with ops.Graph().as_default(): - a_var = variables.Variable(0) + a_var = variables.VariableV1(0) with monitored_session.SingularMonitoredSession() as session: # If it's not initialized, following statement raises an error. self.assertEqual(0, session.run(a_var)) diff --git a/tensorflow/python/training/quantize_training_test.py b/tensorflow/python/training/quantize_training_test.py index 9754adea85..6edbf7665f 100644 --- a/tensorflow/python/training/quantize_training_test.py +++ b/tensorflow/python/training/quantize_training_test.py @@ -58,7 +58,8 @@ class PywrapQuantizeTrainingTest(test.TestCase): g = ops.Graph() with session.Session(graph=g) as sess: a = constant_op.constant(6.0, shape=[1, 1], name='a') - b = variables.Variable(constant_op.constant(7.0, shape=[1, 1]), name='b') + b = variables.VariableV1( + constant_op.constant(7.0, shape=[1, 1]), name='b') c = math_ops.matmul(a, b, name='matmul') init_op = variables.global_variables_initializer() diff --git a/tensorflow/python/training/queue_runner_test.py b/tensorflow/python/training/queue_runner_test.py index 9b9e28af2b..15fe42bbd8 100644 --- a/tensorflow/python/training/queue_runner_test.py +++ b/tensorflow/python/training/queue_runner_test.py @@ -44,7 +44,7 @@ class QueueRunnerTest(test.TestCase): with self.cached_session() as sess: # CountUpTo will raise OUT_OF_RANGE when it reaches the count. zero64 = constant_op.constant(0, dtype=dtypes.int64) - var = variables.Variable(zero64) + var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) variables.global_variables_initializer().run() @@ -64,9 +64,9 @@ class QueueRunnerTest(test.TestCase): with self.cached_session() as sess: # CountUpTo will raise OUT_OF_RANGE when it reaches the count. zero64 = constant_op.constant(0, dtype=dtypes.int64) - var0 = variables.Variable(zero64) + var0 = variables.VariableV1(zero64) count_up_to_3 = var0.count_up_to(3) - var1 = variables.Variable(zero64) + var1 = variables.VariableV1(zero64) count_up_to_30 = var1.count_up_to(30) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) qr = queue_runner_impl.QueueRunner(queue, [count_up_to_3, count_up_to_30]) @@ -131,7 +131,7 @@ class QueueRunnerTest(test.TestCase): with self.cached_session() as sess: # CountUpTo will raise OUT_OF_RANGE when it reaches the count. zero64 = constant_op.constant(0, dtype=dtypes.int64) - var = variables.Variable(zero64) + var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) variables.global_variables_initializer().run() @@ -184,7 +184,7 @@ class QueueRunnerTest(test.TestCase): with self.cached_session() as sess: with session.Session() as other_sess: zero64 = constant_op.constant(0, dtype=dtypes.int64) - var = variables.Variable(zero64) + var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) variables.global_variables_initializer().run() @@ -199,7 +199,7 @@ class QueueRunnerTest(test.TestCase): with self.cached_session() as sess: # CountUpTo will raise OUT_OF_RANGE when it reaches the count. zero64 = constant_op.constant(0, dtype=dtypes.int64) - var = variables.Variable(zero64) + var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) variables.global_variables_initializer().run() @@ -215,7 +215,7 @@ class QueueRunnerTest(test.TestCase): with self.cached_session() as sess: # CountUpTo will raise OUT_OF_RANGE when it reaches the count. zero64 = constant_op.constant(0, dtype=dtypes.int64) - var = variables.Variable(zero64) + var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) variables.global_variables_initializer().run() @@ -250,7 +250,7 @@ class QueueRunnerTest(test.TestCase): def testStartQueueRunners(self): # CountUpTo will raise OUT_OF_RANGE when it reaches the count. zero64 = constant_op.constant(0, dtype=dtypes.int64) - var = variables.Variable(zero64) + var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) init_op = variables.global_variables_initializer() @@ -267,7 +267,7 @@ class QueueRunnerTest(test.TestCase): def testStartQueueRunnersRaisesIfNotASession(self): zero64 = constant_op.constant(0, dtype=dtypes.int64) - var = variables.Variable(zero64) + var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) init_op = variables.global_variables_initializer() @@ -280,7 +280,7 @@ class QueueRunnerTest(test.TestCase): def testStartQueueRunnersIgnoresMonitoredSession(self): zero64 = constant_op.constant(0, dtype=dtypes.int64) - var = variables.Variable(zero64) + var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) init_op = variables.global_variables_initializer() @@ -297,7 +297,7 @@ class QueueRunnerTest(test.TestCase): graph = ops.Graph() with graph.as_default(): zero64 = constant_op.constant(0, dtype=dtypes.int64) - var = variables.Variable(zero64) + var = variables.VariableV1(zero64) count_up_to = var.count_up_to(3) queue = data_flow_ops.FIFOQueue(10, dtypes.float32) init_op = variables.global_variables_initializer() diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 69b1055ebe..49e6e6546d 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -311,8 +311,8 @@ class SaverTest(test.TestCase): # Build a graph with 2 parameter nodes, and Save and # Restore nodes for them. - v0 = variables.Variable(10.0, name="v0") - v1 = variables.Variable(20.0, name="v1") + v0 = variables.VariableV1(10.0, name="v0") + v1 = variables.VariableV1(20.0, name="v1") v2 = saver_test_utils.CheckpointedOp(name="v2") v2_init = v2.insert("k1", 30.0) save = saver_module.Saver( @@ -350,8 +350,8 @@ class SaverTest(test.TestCase): # Start a second session. In that session the parameter nodes # have not been initialized either. with self.cached_session() as sess: - v0 = variables.Variable(-1.0, name="v0") - v1 = variables.Variable(-1.0, name="v1") + v0 = variables.VariableV1(-1.0, name="v0") + v1 = variables.VariableV1(-1.0, name="v1") v2 = saver_test_utils.CheckpointedOp(name="v2") save = saver_module.Saver({"v0": v0, "v1": v1, "v2": v2.saveable}) @@ -370,7 +370,7 @@ class SaverTest(test.TestCase): self.assertEqual(30.0, v2.values().eval()) def testFilenameTensor(self): - v0 = variables.Variable(0, name="v0") + v0 = variables.VariableV1(0, name="v0") filename = b"somerandomfilename" save = saver_module.Saver({"v0": v0}, filename=filename) with self.cached_session() as sess: @@ -379,7 +379,7 @@ class SaverTest(test.TestCase): self.assertEqual(sess.run(tensor), filename) def testInvalidPath(self): - v0 = variables.Variable(0, name="v0") + v0 = variables.VariableV1(0, name="v0") for ver in (saver_pb2.SaverDef.V1, saver_pb2.SaverDef.V2): with self.cached_session() as sess: save = saver_module.Saver({"v0": v0}, write_version=ver) @@ -392,7 +392,7 @@ class SaverTest(test.TestCase): with self.cached_session() as sess: # Build a graph with 1 node, and save and restore for them. - v = variables.Variable(np.int64(15), name="v") + v = variables.VariableV1(np.int64(15), name="v") save = saver_module.Saver({"v": v}, restore_sequentially=True) variables.global_variables_initializer().run() @@ -402,7 +402,7 @@ class SaverTest(test.TestCase): self.assertEqual(save_path, val) with self.cached_session() as sess: - v = variables.Variable(np.int64(-1), name="v") + v = variables.VariableV1(np.int64(-1), name="v") save = saver_module.Saver({"v": v}) with self.assertRaisesWithPredicateMatch( @@ -416,9 +416,9 @@ class SaverTest(test.TestCase): def testSomeErrors(self): with ops_lib.Graph().as_default(): - v0 = variables.Variable([10.0], name="v0") - v1 = variables.Variable([20.0], name="v1") - v2 = variables.Variable([20.0], name="v2") + v0 = variables.VariableV1([10.0], name="v0") + v1 = variables.VariableV1([20.0], name="v1") + v2 = variables.VariableV1([20.0], name="v2") v2._set_save_slice_info( variables.Variable.SaveSliceInfo("v1", [1], [0], [1])) @@ -446,7 +446,7 @@ class SaverTest(test.TestCase): def testSameName(self): with ops_lib.Graph().as_default(): - v0 = variables.Variable([10.0], name="v0") + v0 = variables.VariableV1([10.0], name="v0") v2 = saver_test_utils.CheckpointedOp(name="v2") # Saving one variable under two names raises an error. @@ -468,8 +468,8 @@ class SaverTest(test.TestCase): with self.session(graph=ops_lib.Graph()) as sess: # Build a graph with 2 parameter nodes, and Save and # Restore nodes for them. - v0 = variables.Variable(10.0, name="v0") - v1 = variables.Variable(20.0, name="v1") + v0 = variables.VariableV1(10.0, name="v0") + v1 = variables.VariableV1(20.0, name="v1") v2 = saver_test_utils.CheckpointedOp(name="v2") v2_init = v2.insert("k1", 30.0) save = saver_module.Saver([v0, v1, v2.saveable]) @@ -490,8 +490,8 @@ class SaverTest(test.TestCase): # Start a second session. In that session the variables # have not been initialized either. with self.session(graph=ops_lib.Graph()) as sess: - v0 = variables.Variable(-1.0, name="v0") - v1 = variables.Variable(-1.0, name="v1") + v0 = variables.VariableV1(-1.0, name="v0") + v1 = variables.VariableV1(-1.0, name="v1") v2 = saver_test_utils.CheckpointedOp(name="v2") save = saver_module.Saver([v0, v1, v2.saveable]) @@ -515,8 +515,8 @@ class SaverTest(test.TestCase): # Build another graph with 2 nodes, initialized # differently, and a Restore node for them. with self.session(graph=ops_lib.Graph()) as sess: - v0_2 = variables.Variable(1000.0, name="v0") - v1_2 = variables.Variable(2000.0, name="v1") + v0_2 = variables.VariableV1(1000.0, name="v0") + v1_2 = variables.VariableV1(2000.0, name="v1") v2_2 = saver_test_utils.CheckpointedOp(name="v2") save2 = saver_module.Saver([v0_2, v1_2, v2_2.saveable]) v2_2.insert("k1000", 3000.0).run() @@ -574,14 +574,14 @@ class SaverTest(test.TestCase): save_path = os.path.join(self.get_temp_dir(), "gpu") with session.Session("", graph=ops_lib.Graph()) as sess: with sess.graph.device(test.gpu_device_name()): - v0_1 = variables.Variable(123.45) + v0_1 = variables.VariableV1(123.45) save = saver_module.Saver({"v0": v0_1}) variables.global_variables_initializer().run() save.save(sess, save_path) with session.Session("", graph=ops_lib.Graph()) as sess: with sess.graph.device(test.gpu_device_name()): - v0_2 = variables.Variable(543.21) + v0_2 = variables.VariableV1(543.21) save = saver_module.Saver({"v0": v0_2}) variables.global_variables_initializer().run() @@ -591,22 +591,22 @@ class SaverTest(test.TestCase): save_path = os.path.join(self.get_temp_dir(), "gpu") with session.Session("", graph=ops_lib.Graph()) as sess: with sess.graph.device(test.gpu_device_name()): - v0_1 = variables.Variable(123.45) + v0_1 = variables.VariableV1(123.45) save = saver_module.Saver({"v0": v0_1}, sharded=True, allow_empty=True) variables.global_variables_initializer().run() save.save(sess, save_path) with session.Session("", graph=ops_lib.Graph()) as sess: with sess.graph.device(test.gpu_device_name()): - v0_2 = variables.Variable(543.21) + v0_2 = variables.VariableV1(543.21) save = saver_module.Saver({"v0": v0_2}, sharded=True, allow_empty=True) variables.global_variables_initializer().run() def testVariables(self): save_path = os.path.join(self.get_temp_dir(), "variables") with session.Session("", graph=ops_lib.Graph()) as sess: - one = variables.Variable(1.0) - twos = variables.Variable([2.0, 2.0, 2.0]) + one = variables.VariableV1(1.0) + twos = variables.VariableV1([2.0, 2.0, 2.0]) v2 = saver_test_utils.CheckpointedOp(name="v2") init = variables.global_variables_initializer() save = saver_module.Saver() @@ -615,8 +615,8 @@ class SaverTest(test.TestCase): save.save(sess, save_path) with session.Session("", graph=ops_lib.Graph()) as sess: - one = variables.Variable(0.0) - twos = variables.Variable([0.0, 0.0, 0.0]) + one = variables.VariableV1(0.0) + twos = variables.VariableV1([0.0, 0.0, 0.0]) v2 = saver_test_utils.CheckpointedOp(name="v2") # Saver with no arg, defaults to 'all variables'. save = saver_module.Saver() @@ -628,14 +628,14 @@ class SaverTest(test.TestCase): def testVarListShouldBeEmptyInDeferredBuild(self): with ops_lib.Graph().as_default(): - v = variables.Variable(1.0) + v = variables.VariableV1(1.0) with self.assertRaisesRegexp(ValueError, "defer_build"): saver_module.Saver([v], defer_build=True) def testBuildShouldBeCalledBeforeSaveInCaseOfDeferBuild(self): save_path = os.path.join(self.get_temp_dir(), "error_deferred_build") with ops_lib.Graph().as_default(), session.Session() as sess: - variables.Variable(1.0) + variables.VariableV1(1.0) saver = saver_module.Saver(defer_build=True) with self.assertRaisesRegexp(RuntimeError, "build"): saver.save(sess, save_path) @@ -643,18 +643,18 @@ class SaverTest(test.TestCase): def testDeferredBuild(self): save_path = os.path.join(self.get_temp_dir(), "deferred_build") with session.Session("", graph=ops_lib.Graph()) as sess: - one = variables.Variable(1.0) + one = variables.VariableV1(1.0) save = saver_module.Saver(defer_build=True) # if build is not deferred, saver cannot save the `twos`. - twos = variables.Variable([2.0, 2.0, 2.0]) + twos = variables.VariableV1([2.0, 2.0, 2.0]) init = variables.global_variables_initializer() save.build() init.run() save.save(sess, save_path) with session.Session("", graph=ops_lib.Graph()) as sess: - one = variables.Variable(0.0) - twos = variables.Variable([0.0, 0.0, 0.0]) + one = variables.VariableV1(0.0) + twos = variables.VariableV1([0.0, 0.0, 0.0]) # Saver with no arg, defaults to 'all variables'. save = saver_module.Saver() save.restore(sess, save_path) @@ -664,7 +664,7 @@ class SaverTest(test.TestCase): def testReshape(self): save_path = os.path.join(self.get_temp_dir(), "variables_reshape") with session.Session("", graph=ops_lib.Graph()) as sess: - var = variables.Variable([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + var = variables.VariableV1([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) init = variables.global_variables_initializer() save = saver_module.Saver() init.run() @@ -672,7 +672,7 @@ class SaverTest(test.TestCase): # Error when restoring with default reshape=False with session.Session("", graph=ops_lib.Graph()) as sess: - var = variables.Variable([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]) + var = variables.VariableV1([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]) save = saver_module.Saver() with self.assertRaisesRegexp( errors_impl.InvalidArgumentError, @@ -681,7 +681,7 @@ class SaverTest(test.TestCase): # Restored to new shape with reshape=True with session.Session("", graph=ops_lib.Graph()) as sess: - var = variables.Variable([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]) + var = variables.VariableV1([[0.0, 0.0], [0.0, 0.0], [0.0, 0.0]]) save = saver_module.Saver(reshape=True) save.restore(sess, save_path) self.assertAllClose([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], var.eval()) @@ -731,8 +731,8 @@ class SaverTest(test.TestCase): for save_path in paths: # Build a graph with 2 parameter nodes, and Save and # Restore nodes for them. - v0 = variables.Variable(10.0, name="v0") - v1 = variables.Variable(20.0, name="v1") + v0 = variables.VariableV1(10.0, name="v0") + v1 = variables.VariableV1(20.0, name="v1") save = saver_module.Saver({"v0": v0, "v1": v1}, restore_sequentially=True) init_all_op = variables.global_variables_initializer() @@ -770,8 +770,8 @@ class SaverTest(test.TestCase): # Build a graph with 2 parameter nodes, and Save and # Restore nodes for them. - v0 = variables.Variable(10.0, name="v0") - v1 = variables.Variable(20.0, name="v1") + v0 = variables.VariableV1(10.0, name="v0") + v1 = variables.VariableV1(20.0, name="v1") save = saver_module.Saver({"v0": v0, "v1": v1}, restore_sequentially=True) init_all_op = variables.global_variables_initializer() @@ -859,10 +859,10 @@ class SaveRestoreShardedTest(test.TestCase): target="", config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: with sess.graph.device("/cpu:0"): - v0 = variables.Variable(10, name="v0") + v0 = variables.VariableV1(10, name="v0") t0 = saver_test_utils.CheckpointedOp(name="t0") with sess.graph.device("/cpu:1"): - v1 = variables.Variable(20, name="v1") + v1 = variables.VariableV1(20, name="v1") t1 = saver_test_utils.CheckpointedOp(name="t1") save = saver_module.Saver( { @@ -890,7 +890,7 @@ class SaveRestoreShardedTest(test.TestCase): target="", config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: with sess.graph.device("/cpu:0"): - v0 = variables.Variable(111, name="v0") + v0 = variables.VariableV1(111, name="v0") t0 = saver_test_utils.CheckpointedOp(name="t0") save = saver_module.Saver( { @@ -914,7 +914,7 @@ class SaveRestoreShardedTest(test.TestCase): target="", config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: with sess.graph.device("/cpu:0"): - v1 = variables.Variable(222) + v1 = variables.VariableV1(222) t1 = saver_test_utils.CheckpointedOp(name="t1") save = saver_module.Saver( { @@ -938,10 +938,10 @@ class SaveRestoreShardedTest(test.TestCase): target="", config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: with sess.graph.device("/cpu:0"): - v0 = variables.Variable(111, name="v0") + v0 = variables.VariableV1(111, name="v0") t0 = saver_test_utils.CheckpointedOp(name="t0") with sess.graph.device("/cpu:1"): - v1 = variables.Variable(222, name="v1") + v1 = variables.VariableV1(222, name="v1") t1 = saver_test_utils.CheckpointedOp(name="t1") save = saver_module.Saver( { @@ -984,7 +984,7 @@ class SaveRestoreShardedTest(test.TestCase): def testSaverDef(self): with self.cached_session(): - v0 = variables.Variable(123, name="v0") + v0 = variables.VariableV1(123, name="v0") save = saver_module.Saver({"v0": v0}, sharded=True) sd = save.as_saver_def() self.assertTrue(sd.sharded) @@ -1023,7 +1023,7 @@ class SaveRestoreShardedTest(test.TestCase): if use_resource: vs = [resource_variable_ops.ResourceVariable(rnd, name=var_name)] else: - vs = [variables.Variable(rnd, name=var_name)] + vs = [variables.VariableV1(rnd, name=var_name)] variables.global_variables_initializer().run() if call_saver_with_dict: @@ -1054,7 +1054,7 @@ class SaveRestoreShardedTest(test.TestCase): ] else: new_vs = [ - variables.Variable( + variables.VariableV1( array_ops.zeros( shape=var_full_shape), # != original contents. name=var_name) @@ -1210,7 +1210,7 @@ class MaxToKeepTest(test.TestCase): save_dir = self._get_test_dir("max_to_keep_non_sharded") with self.cached_session() as sess: - v = variables.Variable(10.0, name="v") + v = variables.VariableV1(10.0, name="v") save = saver_module.Saver({"v": v}, max_to_keep=2) variables.global_variables_initializer().run() self.assertEqual([], save.last_checkpoints) @@ -1389,9 +1389,9 @@ class MaxToKeepTest(test.TestCase): target="", config=config_pb2.ConfigProto(device_count={"CPU": 2})) as sess: with sess.graph.device("/cpu:0"): - v0 = variables.Variable(111, name="v0") + v0 = variables.VariableV1(111, name="v0") with sess.graph.device("/cpu:1"): - v1 = variables.Variable(222, name="v1") + v1 = variables.VariableV1(222, name="v1") save = saver_module.Saver( { "v0": v0, @@ -1448,7 +1448,7 @@ class MaxToKeepTest(test.TestCase): save_dir2 = self._get_test_dir("max_to_keep_0") with self.cached_session() as sess: - v = variables.Variable(10.0, name="v") + v = variables.VariableV1(10.0, name="v") variables.global_variables_initializer().run() # Test max_to_keep being None. @@ -1475,7 +1475,7 @@ class MaxToKeepTest(test.TestCase): save_dir = self._get_test_dir("no_meta_graph") with self.cached_session() as sess: - v = variables.Variable(10.0, name="v") + v = variables.VariableV1(10.0, name="v") save = saver_module.Saver({"v": v}) variables.global_variables_initializer().run() @@ -1632,13 +1632,13 @@ class MetaGraphTest(test.TestCase): filename = os.path.join(test_dir, "metafile") with self.cached_session(): # Creates a graph. - v0 = variables.Variable(1.0, name="v0") + v0 = variables.VariableV1(1.0, name="v0") control_flow_ops.cond( math_ops.less(v0, 10), lambda: math_ops.add(v0, 1), lambda: math_ops.subtract(v0, 1)) control_flow_ops.while_loop(lambda i: math_ops.less(i, 10), lambda i: math_ops.add(i, 1), [v0]) - var = variables.Variable(constant_op.constant(0, dtype=dtypes.int64)) + var = variables.VariableV1(constant_op.constant(0, dtype=dtypes.int64)) count_up_to = var.count_up_to(3) input_queue = data_flow_ops.FIFOQueue( 30, dtypes.float32, shared_name="collection_queue") @@ -1687,7 +1687,7 @@ class MetaGraphTest(test.TestCase): def testAddCollectionDefFails(self): with self.cached_session(): # Creates a graph. - v0 = variables.Variable(10.0, name="v0") + v0 = variables.VariableV1(10.0, name="v0") # Creates a saver. save = saver_module.Saver({"v0": v0}) # Generates MetaGraphDef. @@ -1711,8 +1711,8 @@ class MetaGraphTest(test.TestCase): saver1_ckpt = os.path.join(test_dir, "saver1.ckpt") with self.session(graph=ops_lib.Graph()) as sess: # Creates a graph. - v0 = variables.Variable([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name="v0") - v1 = variables.Variable(11.0, name="v1") + v0 = variables.VariableV1([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name="v0") + v1 = variables.VariableV1(11.0, name="v1") # Creates 2 savers. saver0 = saver_module.Saver({"v0": v0}, name="saver0") saver1 = saver_module.Saver({"v1": v1}, name="saver1") @@ -1788,8 +1788,8 @@ class MetaGraphTest(test.TestCase): saver1_ckpt = os.path.join(test_dir, "saver1.ckpt") with self.session(graph=ops_lib.Graph()) as sess: # Creates a graph. - v0 = variables.Variable([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name="v0") - v1 = variables.Variable(11.0, name="v1") + v0 = variables.VariableV1([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], name="v0") + v1 = variables.VariableV1(11.0, name="v1") # Creates 2 savers. saver0 = saver_module.Saver({"v0": v0}, name="saver0") @@ -1840,7 +1840,7 @@ class MetaGraphTest(test.TestCase): filename = os.path.join(test_dir, "metafile") with self.session(graph=ops_lib.Graph()): # Creates a graph. - variables.Variable(10.0, name="v0") + variables.VariableV1(10.0, name="v0") # Exports the graph as binary format. saver_module.export_meta_graph(filename, as_text=False) with self.session(graph=ops_lib.Graph()): @@ -1871,8 +1871,8 @@ class MetaGraphTest(test.TestCase): test_dir = self._get_test_dir("slice_saver") filename = os.path.join(test_dir, "metafile") with self.cached_session(): - v1 = variables.Variable([20.0], name="v1") - v2 = variables.Variable([20.0], name="v2") + v1 = variables.VariableV1([20.0], name="v1") + v2 = variables.VariableV1([20.0], name="v2") v2._set_save_slice_info( variables.Variable.SaveSliceInfo("v1", [1], [0], [1])) @@ -1899,7 +1899,7 @@ class MetaGraphTest(test.TestCase): # Hidden 1 images = constant_op.constant(1.2, dtypes.float32, shape=[100, 28]) with ops_lib.name_scope("hidden1"): - weights = variables.Variable( + weights = variables.VariableV1( random_ops.truncated_normal( [28, 128], stddev=1.0 / math.sqrt(float(28))), name="weights") @@ -1907,7 +1907,7 @@ class MetaGraphTest(test.TestCase): # the save and restore of control flow context (which doesn't make any # sense here from a machine learning perspective). The typical biases is # a simple Variable without the conditions. - biases = variables.Variable( + biases = variables.VariableV1( control_flow_ops.cond( math_ops.less(random.random(), 0.5), lambda: array_ops.ones([128]), lambda: array_ops.zeros([128])), @@ -1915,7 +1915,7 @@ class MetaGraphTest(test.TestCase): hidden1 = nn_ops.relu(math_ops.matmul(images, weights) + biases) # Hidden 2 with ops_lib.name_scope("hidden2"): - weights = variables.Variable( + weights = variables.VariableV1( random_ops.truncated_normal( [128, 32], stddev=1.0 / math.sqrt(float(128))), name="weights") @@ -1933,15 +1933,16 @@ class MetaGraphTest(test.TestCase): _, biases = control_flow_ops.while_loop( loop_cond, loop_body, - [constant_op.constant(0), variables.Variable(array_ops.zeros([32]))]) + [constant_op.constant(0), + variables.VariableV1(array_ops.zeros([32]))]) hidden2 = nn_ops.relu(math_ops.matmul(hidden1, weights) + biases) # Linear with ops_lib.name_scope("softmax_linear"): - weights = variables.Variable( + weights = variables.VariableV1( random_ops.truncated_normal( [32, 10], stddev=1.0 / math.sqrt(float(32))), name="weights") - biases = variables.Variable(array_ops.zeros([10]), name="biases") + biases = variables.VariableV1(array_ops.zeros([10]), name="biases") logits = math_ops.matmul(hidden2, weights) + biases ops_lib.add_to_collection("logits", logits) init_all_op = variables.global_variables_initializer() @@ -2028,7 +2029,7 @@ class MetaGraphTest(test.TestCase): # Create while loop using `outer_body_fn`. with ops_lib.Graph().as_default(): - var = variables.Variable(0.0) + var = variables.VariableV1(0.0) var_name = var.name output = graph_fn(var) output_name = output.name @@ -2122,8 +2123,8 @@ class MetaGraphTest(test.TestCase): def testStrippedOpListDef(self): with self.cached_session(): # Creates a graph. - v0 = variables.Variable(0.0) - var = variables.Variable(10.0) + v0 = variables.VariableV1(0.0) + var = variables.VariableV1(10.0) math_ops.add(v0, var) @function.Defun(dtypes.float32) @@ -2161,8 +2162,8 @@ class MetaGraphTest(test.TestCase): # With strip_default_attrs enabled, attributes "T" (float32) and "Tout" # (complex64) in the "Complex" op must be removed. with self.cached_session(): - real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real") - imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag") + real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real") + imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag") math_ops.complex(real_num, imag_num, name="complex") save = saver_module.Saver({"real_num": real_num, "imag_num": imag_num}) @@ -2178,8 +2179,8 @@ class MetaGraphTest(test.TestCase): # (complex64) in the "Complex" op must *not* be removed, even if they map # to their defaults. with self.session(graph=ops_lib.Graph()): - real_num = variables.Variable(1.0, dtype=dtypes.float32, name="real") - imag_num = variables.Variable(2.0, dtype=dtypes.float32, name="imag") + real_num = variables.VariableV1(1.0, dtype=dtypes.float32, name="real") + imag_num = variables.VariableV1(2.0, dtype=dtypes.float32, name="imag") math_ops.complex(real_num, imag_num, name="complex") save = saver_module.Saver({"real_num": real_num, "imag_num": imag_num}) @@ -2198,9 +2199,9 @@ class MetaGraphTest(test.TestCase): image = array_ops.placeholder(dtypes.float32, [None, 784], name="image") label = array_ops.placeholder(dtypes.float32, [None, 10], name="label") with session.Session() as sess: - weights = variables.Variable( + weights = variables.VariableV1( random_ops.random_uniform([784, 10]), name="weights") - bias = variables.Variable(array_ops.zeros([10]), name="bias") + bias = variables.VariableV1(array_ops.zeros([10]), name="bias") logit = nn_ops.relu(math_ops.matmul(image, weights) + bias, name="logits") nn_ops.softmax(logit, name="prediction") cost = nn_ops.softmax_cross_entropy_with_logits(labels=label, @@ -2243,7 +2244,7 @@ class MetaGraphTest(test.TestCase): self.assertIsNone(new_saver_1) # Create a variable in graph_2 under scope "my_scope". - variables.Variable(array_ops.zeros([10]), name="my_scope/my_var") + variables.VariableV1(array_ops.zeros([10]), name="my_scope/my_var") sess.run(variables.global_variables_initializer()) # Restore the checkpoint into a different scope "subgraph_2". new_saver_2 = saver_module.import_meta_graph( @@ -2268,9 +2269,9 @@ class MetaGraphTest(test.TestCase): image = array_ops.placeholder(dtypes.float32, [None, 784], name="image") label = array_ops.placeholder(dtypes.float32, [None, 10], name="label") with session.Session() as sess: - weights = variables.Variable( + weights = variables.VariableV1( random_ops.random_uniform([784, 10]), name="weights") - bias = variables.Variable(array_ops.zeros([10]), name="bias") + bias = variables.VariableV1(array_ops.zeros([10]), name="bias") logit = nn_ops.relu(math_ops.matmul(image, weights) + bias, name="logits") nn_ops.softmax(logit, name="prediction") cost = nn_ops.softmax_cross_entropy_with_logits(labels=label, @@ -2299,9 +2300,9 @@ class MetaGraphTest(test.TestCase): with ops_lib.device("/job:ps/replica:0/task:0/device:GPU:0"): image = array_ops.placeholder(dtypes.float32, [None, 784], name="image") label = array_ops.placeholder(dtypes.float32, [None, 10], name="label") - weights = variables.Variable( + weights = variables.VariableV1( random_ops.random_uniform([784, 10]), name="weights") - bias = variables.Variable(array_ops.zeros([10]), name="bias") + bias = variables.VariableV1(array_ops.zeros([10]), name="bias") logit = nn_ops.relu(math_ops.matmul(image, weights) + bias) nn_ops.softmax(logit, name="prediction") cost = nn_ops.softmax_cross_entropy_with_logits(labels=label, @@ -2332,9 +2333,9 @@ class MetaGraphTest(test.TestCase): with ops_lib.device("/job:ps/replica:0/task:0/device:GPU:0"): image = array_ops.placeholder(dtypes.float32, [None, 784], name="image") label = array_ops.placeholder(dtypes.float32, [None, 10], name="label") - weights = variables.Variable( + weights = variables.VariableV1( random_ops.random_uniform([784, 10]), name="weights") - bias = variables.Variable(array_ops.zeros([10]), name="bias") + bias = variables.VariableV1(array_ops.zeros([10]), name="bias") logit = nn_ops.relu(math_ops.matmul(image, weights) + bias) nn_ops.softmax(logit, name="prediction") cost = nn_ops.softmax_cross_entropy_with_logits(labels=label, @@ -2385,9 +2386,9 @@ class CheckpointReaderTest(test.TestCase): def testDebugString(self): # Builds a graph. - v0 = variables.Variable( + v0 = variables.VariableV1( [[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0") - v1 = variables.Variable( + v1 = variables.VariableV1( [[[1], [2]], [[3], [4]], [[5], [6]]], dtype=dtypes.float32, name="v1") init_all_op = variables.global_variables_initializer() save = saver_module.Saver( @@ -2444,7 +2445,8 @@ class WriteGraphTest(test.TestCase): def testWriteGraph(self): test_dir = self._get_test_dir("write_graph_dir") - variables.Variable([[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0") + variables.VariableV1( + [[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0") path = graph_io.write_graph(ops_lib.get_default_graph(), os.path.join(test_dir, "l1"), "graph.pbtxt") truth = os.path.join(test_dir, "l1", "graph.pbtxt") @@ -2453,7 +2455,8 @@ class WriteGraphTest(test.TestCase): def testRecursiveCreate(self): test_dir = self._get_test_dir("deep_dir") - variables.Variable([[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0") + variables.VariableV1( + [[1, 2, 3], [4, 5, 6]], dtype=dtypes.float32, name="v0") path = graph_io.write_graph(ops_lib.get_default_graph().as_graph_def(), os.path.join(test_dir, "l1", "l2", "l3"), "graph.pbtxt") @@ -2477,7 +2480,7 @@ class ScopedGraphTest(test.TestCase): images = constant_op.constant( 1.2, dtypes.float32, shape=[100, 28], name="images") with ops_lib.name_scope("hidden1"): - weights1 = variables.Variable( + weights1 = variables.VariableV1( random_ops.truncated_normal( [28, 128], stddev=1.0 / math.sqrt(float(28))), name="weights") @@ -2485,7 +2488,7 @@ class ScopedGraphTest(test.TestCase): # coverage the save and restore of control flow context (which doesn't # make any sense here from a machine learning perspective). The typical # biases is a simple Variable without the conditions. - biases1 = variables.Variable( + biases1 = variables.VariableV1( control_flow_ops.cond( math_ops.less(random.random(), 0.5), lambda: array_ops.ones([128]), lambda: array_ops.zeros([128])), @@ -2494,7 +2497,7 @@ class ScopedGraphTest(test.TestCase): # Hidden 2 with ops_lib.name_scope("hidden2"): - weights2 = variables.Variable( + weights2 = variables.VariableV1( random_ops.truncated_normal( [128, 32], stddev=1.0 / math.sqrt(float(128))), name="weights") @@ -2511,16 +2514,16 @@ class ScopedGraphTest(test.TestCase): return it + 1, biases2 _, biases2 = control_flow_ops.while_loop(loop_cond, loop_body, [ - constant_op.constant(0), variables.Variable(array_ops.zeros([32])) + constant_op.constant(0), variables.VariableV1(array_ops.zeros([32])) ]) hidden2 = nn_ops.relu(math_ops.matmul(hidden1, weights2) + biases2) # Linear with ops_lib.name_scope("softmax_linear"): - weights3 = variables.Variable( + weights3 = variables.VariableV1( random_ops.truncated_normal( [32, 10], stddev=1.0 / math.sqrt(float(32))), name="weights") - biases3 = variables.Variable(array_ops.zeros([10]), name="biases") + biases3 = variables.VariableV1(array_ops.zeros([10]), name="biases") logits = math_ops.matmul(hidden2, weights3) + biases3 ops_lib.add_to_collection("logits", logits) @@ -2566,7 +2569,7 @@ class ScopedGraphTest(test.TestCase): with graph.as_default(): # Hidden 2 with ops_lib.name_scope("hidden2"): - weights = variables.Variable( + weights = variables.VariableV1( random_ops.truncated_normal( [128, 32], stddev=1.0 / math.sqrt(float(128))), name="weights") @@ -2583,16 +2586,16 @@ class ScopedGraphTest(test.TestCase): return it + 1, biases _, biases = control_flow_ops.while_loop(loop_cond, loop_body, [ - constant_op.constant(0), variables.Variable(array_ops.zeros([32])) + constant_op.constant(0), variables.VariableV1(array_ops.zeros([32])) ]) hidden2 = nn_ops.relu(math_ops.matmul(hidden1, weights) + biases) # Linear with ops_lib.name_scope("softmax_linear"): - weights = variables.Variable( + weights = variables.VariableV1( random_ops.truncated_normal( [32, 10], stddev=1.0 / math.sqrt(float(32))), name="weights") - biases = variables.Variable(array_ops.zeros([10]), name="biases") + biases = variables.VariableV1(array_ops.zeros([10]), name="biases") logits = math_ops.matmul(hidden2, weights) + biases ops_lib.add_to_collection("logits", logits) @@ -2629,9 +2632,9 @@ class ScopedGraphTest(test.TestCase): with ops_lib.name_scope("hidden1"): images = constant_op.constant( 1.0, dtypes.float32, shape=[3, 2], name="images") - weights1 = variables.Variable( + weights1 = variables.VariableV1( [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], name="weights") - biases1 = variables.Variable([0.1] * 3, name="biases") + biases1 = variables.VariableV1([0.1] * 3, name="biases") nn_ops.relu(math_ops.matmul(images, weights1) + biases1, name="relu") # Run the graph and save scoped checkpoint. @@ -2685,9 +2688,9 @@ class ScopedGraphTest(test.TestCase): with ops_lib.name_scope("hidden1"): images = constant_op.constant( 1.0, dtypes.float32, shape=[3, 2], name="images") - weights1 = variables.Variable( + weights1 = variables.VariableV1( [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], name="weights") - biases1 = variables.Variable([0.1] * 3, name="biases") + biases1 = variables.VariableV1([0.1] * 3, name="biases") nn_ops.relu(math_ops.matmul(images, weights1) + biases1, name="relu") # Run the graph and save scoped checkpoint. @@ -2720,12 +2723,12 @@ class ScopedGraphTest(test.TestCase): graph = ops_lib.Graph() with graph.as_default(): with ops_lib.name_scope("hidden1"): - variable1 = variables.Variable([1.0], name="variable1") + variable1 = variables.VariableV1([1.0], name="variable1") saver1 = saver_module.Saver(var_list=[variable1]) graph.add_to_collection(ops_lib.GraphKeys.SAVERS, saver1) with ops_lib.name_scope("hidden2"): - variable2 = variables.Variable([2.0], name="variable2") + variable2 = variables.VariableV1([2.0], name="variable2") saver2 = saver_module.Saver(var_list=[variable2], name="hidden2/") graph.add_to_collection(ops_lib.GraphKeys.SAVERS, saver2) @@ -2978,7 +2981,7 @@ class CheckpointableCompatibilityTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with ops_lib.Graph().as_default() as g: - a = variables.Variable(1., name="a") + a = variables.VariableV1(1., name="a") a_saver = saver_module.Saver([a]) with self.session(graph=g) as sess: @@ -2986,7 +2989,7 @@ class CheckpointableCompatibilityTests(test.TestCase): save_path = a_saver.save(sess=sess, save_path=checkpoint_prefix) with ops_lib.Graph().as_default() as g: - a = variables.Variable([1.], name="a") + a = variables.VariableV1([1.], name="a") a_saver = saver_module.Saver([a]) with self.session(graph=g) as sess: with self.assertRaisesRegexp( diff --git a/tensorflow/python/training/server_lib_same_variables_no_clear_test.py b/tensorflow/python/training/server_lib_same_variables_no_clear_test.py index c7e84e9ba1..5aa7f45c2b 100644 --- a/tensorflow/python/training/server_lib_same_variables_no_clear_test.py +++ b/tensorflow/python/training/server_lib_same_variables_no_clear_test.py @@ -37,8 +37,8 @@ class SameVariablesNoClearTest(test.TestCase): server = server_lib.Server.create_local_server() with session.Session(server.target) as sess_1: - v0 = variables.Variable([[2, 1]], name="v0") - v1 = variables.Variable([[1], [2]], name="v1") + v0 = variables.VariableV1([[2, 1]], name="v0") + v1 = variables.VariableV1([[1], [2]], name="v1") v2 = math_ops.matmul(v0, v1) sess_1.run([v0.initializer, v1.initializer]) self.assertAllEqual([[4]], sess_1.run(v2)) diff --git a/tensorflow/python/training/server_lib_test.py b/tensorflow/python/training/server_lib_test.py index 063044f0d0..cf995707fc 100644 --- a/tensorflow/python/training/server_lib_test.py +++ b/tensorflow/python/training/server_lib_test.py @@ -76,9 +76,9 @@ class GrpcServerTest(test.TestCase): def testResetFails(self): # Creates variable with container name. with ops.container("test0"): - v0 = variables.Variable(1.0, name="v0") + v0 = variables.VariableV1(1.0, name="v0") # Creates variable with default container. - v1 = variables.Variable(2.0, name="v1") + v1 = variables.VariableV1(2.0, name="v1") # Verifies resetting the non-existent target returns error. with self.assertRaises(errors_impl.NotFoundError): session.Session.reset("nonexistent", ["test0"]) @@ -234,8 +234,8 @@ class GrpcServerTest(test.TestCase): [0.], dtype=dtypes.float32)) self.assertIsNotNone(input_queue) - var = variables.Variable(1., dtype=dtypes.float32, trainable=False, - name="var") + var = variables.VariableV1(1., dtype=dtypes.float32, trainable=False, + name="var") sess.run(variables.global_variables_initializer()) queue_runner_impl.start_queue_runners(sess) @@ -245,7 +245,7 @@ class GrpcServerTest(test.TestCase): server = self._cached_server init_value = array_ops.placeholder(dtypes.int32) - v = variables.Variable(init_value, validate_shape=False, name="v") + v = variables.VariableV1(init_value, validate_shape=False, name="v") sharing_config = config_pb2.ConfigProto(isolate_session_state=False) sharing_sess_0 = session.Session(server.target, config=sharing_config) @@ -302,7 +302,7 @@ class GrpcServerTest(test.TestCase): isolate_config = config_pb2.ConfigProto(isolate_session_state=True) with ops.Graph().as_default(): - w_vector = variables.Variable([1, 2, 3], name="w") + w_vector = variables.VariableV1([1, 2, 3], name="w") with session.Session(server.target, config=sharing_config) as sess: with self.assertRaises(errors_impl.FailedPreconditionError): sess.run(w_vector) @@ -310,20 +310,20 @@ class GrpcServerTest(test.TestCase): self.assertAllEqual([1, 2, 3], sess.run(w_vector)) with ops.Graph().as_default(): - w_vector = variables.Variable([4, 5, 6], name="w") + w_vector = variables.VariableV1([4, 5, 6], name="w") with session.Session(server.target, config=sharing_config) as sess: self.assertAllEqual([1, 2, 3], sess.run(w_vector)) sess.run(w_vector.initializer) self.assertAllEqual([4, 5, 6], sess.run(w_vector)) with ops.Graph().as_default(): - w_scalar = variables.Variable(86, name="w") + w_scalar = variables.VariableV1(86, name="w") with session.Session(server.target, config=sharing_config) as sess: with self.assertRaises(errors_impl.InvalidArgumentError): sess.run(w_scalar.initializer) with ops.Graph().as_default(): - w_scalar = variables.Variable(37, name="w") + w_scalar = variables.VariableV1(37, name="w") with session.Session(server.target, config=isolate_config) as sess: with self.assertRaises(errors_impl.FailedPreconditionError): sess.run(w_scalar) diff --git a/tensorflow/python/training/session_manager_test.py b/tensorflow/python/training/session_manager_test.py index f1d18f7704..2b5c3b01de 100644 --- a/tensorflow/python/training/session_manager_test.py +++ b/tensorflow/python/training/session_manager_test.py @@ -40,7 +40,7 @@ class SessionManagerTest(test.TestCase): def testPrepareSessionSucceeds(self): with ops.Graph().as_default(): - v = variables.Variable([1.0, 2.0, 3.0], name="v") + v = variables.VariableV1([1.0, 2.0, 3.0], name="v") sm = session_manager.SessionManager( ready_op=variables.report_uninitialized_variables()) sess = sm.prepare_session( @@ -50,7 +50,7 @@ class SessionManagerTest(test.TestCase): def testPrepareSessionSucceedsWithInitFeedDict(self): with ops.Graph().as_default(): p = array_ops.placeholder(dtypes.float32, shape=(3,)) - v = variables.Variable(p, name="v") + v = variables.VariableV1(p, name="v") sm = session_manager.SessionManager( ready_op=variables.report_uninitialized_variables()) sess = sm.prepare_session( @@ -61,7 +61,7 @@ class SessionManagerTest(test.TestCase): def testPrepareSessionSucceedsWithInitFn(self): with ops.Graph().as_default(): - v = variables.Variable([125], name="v") + v = variables.VariableV1([125], name="v") sm = session_manager.SessionManager( ready_op=variables.report_uninitialized_variables()) sess = sm.prepare_session( @@ -79,7 +79,7 @@ class SessionManagerTest(test.TestCase): gfile.MakeDirs(checkpoint_dir) with ops.Graph().as_default(): - v = variables.Variable([1.0, 2.0, 3.0], name="v") + v = variables.VariableV1([1.0, 2.0, 3.0], name="v") sm = session_manager.SessionManager( ready_op=variables.report_uninitialized_variables()) saver = saver_lib.Saver({"v": v}) @@ -97,7 +97,7 @@ class SessionManagerTest(test.TestCase): # Renames the checkpoint directory. os.rename(checkpoint_dir, checkpoint_dir2) gfile.MakeDirs(checkpoint_dir) - v = variables.Variable([6.0, 7.0, 8.0], name="v") + v = variables.VariableV1([6.0, 7.0, 8.0], name="v") with self.cached_session(): self.assertEqual(False, variables.is_variable_initialized(v).eval()) session_manager.SessionManager( @@ -134,7 +134,7 @@ class SessionManagerTest(test.TestCase): checkpoint_filename_with_path=None): # Create a new Graph and SessionManager and recover from a checkpoint. with ops.Graph().as_default(): - v = variables.Variable(2, name="v") + v = variables.VariableV1(2, name="v") with session_lib.Session(): self.assertEqual(False, variables.is_variable_initialized(v).eval()) sm2 = session_manager.SessionManager( @@ -162,7 +162,7 @@ class SessionManagerTest(test.TestCase): gfile.MakeDirs(checkpoint_dir) with ops.Graph().as_default(): - v = variables.Variable(1, name="v") + v = variables.VariableV1(1, name="v") sm = session_manager.SessionManager( ready_op=variables.report_uninitialized_variables()) saver = saver_lib.Saver({"v": v}) @@ -186,7 +186,7 @@ class SessionManagerTest(test.TestCase): def testWaitForSessionReturnsNoneAfterTimeout(self): with ops.Graph().as_default(): - variables.Variable(1, name="v") + variables.VariableV1(1, name="v") sm = session_manager.SessionManager( ready_op=variables.report_uninitialized_variables(), recovery_wait_secs=1) @@ -217,7 +217,7 @@ class SessionManagerTest(test.TestCase): gfile.MakeDirs(checkpoint_dir) with ops.Graph().as_default(): - v = variables.Variable(1, name="v") + v = variables.VariableV1(1, name="v") sm = session_manager.SessionManager( ready_op=variables.report_uninitialized_variables()) saver = saver_lib.Saver({"v": v}) @@ -230,8 +230,8 @@ class SessionManagerTest(test.TestCase): os.path.join(checkpoint_dir, "recover_session_checkpoint")) # Create a new Graph and SessionManager and recover. with ops.Graph().as_default(): - v = variables.Variable(2, name="v") - w = variables.Variable( + v = variables.VariableV1(2, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -275,7 +275,7 @@ class SessionManagerTest(test.TestCase): gfile.MakeDirs(checkpoint_dir) with ops.Graph().as_default(): - v = variables.Variable(1, name="v") + v = variables.VariableV1(1, name="v") sm = session_manager.SessionManager( ready_op=variables.report_uninitialized_variables()) saver = saver_lib.Saver({"v": v}) @@ -288,8 +288,8 @@ class SessionManagerTest(test.TestCase): os.path.join(checkpoint_dir, "recover_session_checkpoint")) # Create a new Graph and SessionManager and recover. with ops.Graph().as_default(): - v = variables.Variable(2, name="v") - w = variables.Variable( + v = variables.VariableV1(2, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -321,7 +321,7 @@ class SessionManagerTest(test.TestCase): # local_init_op exactly once, regardless of whether the session was # successfully recovered. with ops.Graph().as_default(): - w = variables.Variable( + w = variables.VariableV1( 1, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -356,8 +356,8 @@ class SessionManagerTest(test.TestCase): # Create a new Graph and SessionManager and recover. with ops.Graph().as_default(): - v = variables.Variable(2, name="v") - w = variables.Variable( + v = variables.VariableV1(2, name="v") + w = variables.VariableV1( 1, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -389,8 +389,8 @@ class SessionManagerTest(test.TestCase): def testWaitForSessionLocalInit(self): server = server_lib.Server.create_local_server() with ops.Graph().as_default() as graph: - v = variables.Variable(1, name="v") - w = variables.Variable( + v = variables.VariableV1(1, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -420,8 +420,8 @@ class SessionManagerTest(test.TestCase): def testWaitForSessionWithReadyForLocalInitOpFailsToReadyLocal(self): with ops.Graph().as_default() as graph: - v = variables.Variable(1, name="v") - w = variables.Variable( + v = variables.VariableV1(1, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -439,8 +439,8 @@ class SessionManagerTest(test.TestCase): def testWaitForSessionInsufficientReadyForLocalInitCheck(self): with ops.Graph().as_default() as graph: - v = variables.Variable(1, name="v") - w = variables.Variable( + v = variables.VariableV1(1, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -456,13 +456,13 @@ class SessionManagerTest(test.TestCase): def testPrepareSessionWithReadyForLocalInitOp(self): with ops.Graph().as_default(): - v = variables.Variable(1, name="v") - w = variables.Variable( + v = variables.VariableV1(1, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], name="w") - x = variables.Variable( + x = variables.VariableV1( 3 * v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -495,25 +495,25 @@ class SessionManagerTest(test.TestCase): def testPrepareSessionWithPartialInitOp(self): with ops.Graph().as_default(): - v = variables.Variable(1, name="v") - w = variables.Variable( + v = variables.VariableV1(1, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], name="w") - x = variables.Variable( + x = variables.VariableV1( 3 * v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], name="x") # TODO(b/70206927): Use ResourceVariables once they are handled properly. - v_res = variables.Variable(1, name="v_res") - w_res = variables.Variable( + v_res = variables.VariableV1(1, name="v_res") + w_res = variables.VariableV1( v_res, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], name="w_res") - x_res = variables.Variable( + x_res = variables.VariableV1( 3 * v_res, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -565,7 +565,7 @@ class SessionManagerTest(test.TestCase): # cyclic dependencies. with ops.Graph().as_default(): i = control_flow_ops.while_loop(lambda i: i < 1, lambda i: i + 1, [0]) - v = variables.Variable(array_ops.identity(i), name="v") + v = variables.VariableV1(array_ops.identity(i), name="v") with self.cached_session(): self.assertEqual(False, variables.is_variable_initialized(v).eval()) sm = session_manager.SessionManager( @@ -579,8 +579,8 @@ class SessionManagerTest(test.TestCase): def testPrepareSessionDidNotInitLocalVariable(self): with ops.Graph().as_default(): - v = variables.Variable(1, name="v") - w = variables.Variable( + v = variables.VariableV1(1, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -596,8 +596,8 @@ class SessionManagerTest(test.TestCase): def testPrepareSessionDidNotInitLocalVariableList(self): with ops.Graph().as_default(): - v = variables.Variable(1, name="v") - w = variables.Variable( + v = variables.VariableV1(1, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -613,8 +613,8 @@ class SessionManagerTest(test.TestCase): def testPrepareSessionWithReadyNotReadyForLocal(self): with ops.Graph().as_default(): - v = variables.Variable(1, name="v") - w = variables.Variable( + v = variables.VariableV1(1, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -634,8 +634,8 @@ class SessionManagerTest(test.TestCase): def testPrepareSessionWithInsufficientReadyForLocalInitCheck(self): with ops.Graph().as_default(): - v = variables.Variable(1, name="v") - w = variables.Variable( + v = variables.VariableV1(1, name="v") + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -656,7 +656,7 @@ class ObsoleteSessionManagerTest(test.TestCase): def testPrepareSessionSucceeds(self): with ops.Graph().as_default(): - v = variables.Variable([1.0, 2.0, 3.0], name="v") + v = variables.VariableV1([1.0, 2.0, 3.0], name="v") sm = session_manager.SessionManager( ready_op=variables.assert_variables_initialized()) sess = sm.prepare_session( @@ -666,7 +666,7 @@ class ObsoleteSessionManagerTest(test.TestCase): def testPrepareSessionSucceedsWithInitFeedDict(self): with ops.Graph().as_default(): p = array_ops.placeholder(dtypes.float32, shape=(3,)) - v = variables.Variable(p, name="v") + v = variables.VariableV1(p, name="v") sm = session_manager.SessionManager( ready_op=variables.assert_variables_initialized()) sess = sm.prepare_session( @@ -677,7 +677,7 @@ class ObsoleteSessionManagerTest(test.TestCase): def testPrepareSessionSucceedsWithInitFn(self): with ops.Graph().as_default(): - v = variables.Variable([125], name="v") + v = variables.VariableV1([125], name="v") sm = session_manager.SessionManager( ready_op=variables.assert_variables_initialized()) sess = sm.prepare_session( @@ -695,7 +695,7 @@ class ObsoleteSessionManagerTest(test.TestCase): gfile.MakeDirs(checkpoint_dir) with ops.Graph().as_default(): - v = variables.Variable([1.0, 2.0, 3.0], name="v") + v = variables.VariableV1([1.0, 2.0, 3.0], name="v") sm = session_manager.SessionManager( ready_op=variables.assert_variables_initialized()) saver = saver_lib.Saver({"v": v}) @@ -713,7 +713,7 @@ class ObsoleteSessionManagerTest(test.TestCase): # Renames the checkpoint directory. os.rename(checkpoint_dir, checkpoint_dir2) gfile.MakeDirs(checkpoint_dir) - v = variables.Variable([6.0, 7.0, 8.0], name="v") + v = variables.VariableV1([6.0, 7.0, 8.0], name="v") with self.cached_session(): self.assertEqual(False, variables.is_variable_initialized(v).eval()) session_manager.SessionManager( @@ -755,7 +755,7 @@ class ObsoleteSessionManagerTest(test.TestCase): gfile.MakeDirs(checkpoint_dir) with ops.Graph().as_default(): - v = variables.Variable(1, name="v") + v = variables.VariableV1(1, name="v") sm = session_manager.SessionManager( ready_op=variables.assert_variables_initialized()) saver = saver_lib.Saver({"v": v}) @@ -768,7 +768,7 @@ class ObsoleteSessionManagerTest(test.TestCase): os.path.join(checkpoint_dir, "recover_session_checkpoint")) # Create a new Graph and SessionManager and recover. with ops.Graph().as_default(): - v = variables.Variable(2, name="v") + v = variables.VariableV1(2, name="v") with self.cached_session(): self.assertEqual(False, variables.is_variable_initialized(v).eval()) sm2 = session_manager.SessionManager( @@ -785,7 +785,7 @@ class ObsoleteSessionManagerTest(test.TestCase): def testWaitForSessionReturnsNoneAfterTimeout(self): with ops.Graph().as_default(): - variables.Variable(1, name="v") + variables.VariableV1(1, name="v") sm = session_manager.SessionManager( ready_op=variables.assert_variables_initialized(), recovery_wait_secs=1) diff --git a/tensorflow/python/training/supervisor_test.py b/tensorflow/python/training/supervisor_test.py index caf6eba3e0..7cd99d8680 100644 --- a/tensorflow/python/training/supervisor_test.py +++ b/tensorflow/python/training/supervisor_test.py @@ -423,7 +423,7 @@ class SupervisorTest(test.TestCase): def testLogdirButExplicitlyNoSummaryWriter(self): logdir = self._test_dir("explicit_no_summary_writer") with ops.Graph().as_default(): - variables.Variable([1.0], name="foo") + variables.VariableV1([1.0], name="foo") summary.scalar("c1", constant_op.constant(1)) summary.scalar("c2", constant_op.constant(2)) summary.scalar("c3", constant_op.constant(3)) @@ -491,7 +491,7 @@ class SupervisorTest(test.TestCase): def testNoLogdirSucceeds(self): with ops.Graph().as_default(): - variables.Variable([1.0, 2.0, 3.0]) + variables.VariableV1([1.0, 2.0, 3.0]) sv = supervisor.Supervisor(logdir="", summary_op=None) sess = sv.prepare_or_wait_for_session("") sess.close() @@ -499,7 +499,7 @@ class SupervisorTest(test.TestCase): def testUseSessionManager(self): with ops.Graph().as_default(): - variables.Variable([1.0, 2.0, 3.0]) + variables.VariableV1([1.0, 2.0, 3.0]) sm = session_manager_lib.SessionManager() # Pass in session_manager. The additional init_op is ignored. sv = supervisor.Supervisor(logdir="", session_manager=sm) @@ -508,7 +508,7 @@ class SupervisorTest(test.TestCase): def testInitOp(self): logdir = self._test_dir("default_init_op") with ops.Graph().as_default(): - v = variables.Variable([1.0, 2.0, 3.0]) + v = variables.VariableV1([1.0, 2.0, 3.0]) sv = supervisor.Supervisor(logdir=logdir) sess = sv.prepare_or_wait_for_session("") self.assertAllClose([1.0, 2.0, 3.0], sess.run(v)) @@ -517,7 +517,7 @@ class SupervisorTest(test.TestCase): def testInitFn(self): logdir = self._test_dir("default_init_op") with ops.Graph().as_default(): - v = variables.Variable([1.0, 2.0, 3.0]) + v = variables.VariableV1([1.0, 2.0, 3.0]) def _init_fn(sess): sess.run(v.initializer) @@ -531,7 +531,7 @@ class SupervisorTest(test.TestCase): logdir = self._test_dir("feed_dict_init_op") with ops.Graph().as_default(): p = array_ops.placeholder(dtypes.float32, shape=(3,)) - v = variables.Variable(p, name="v") + v = variables.VariableV1(p, name="v") sv = supervisor.Supervisor( logdir=logdir, init_op=variables.global_variables_initializer(), @@ -550,10 +550,10 @@ class SupervisorTest(test.TestCase): g = ops.Graph() with g.as_default(): with ops.device("/job:local"): - v = variables.Variable( + v = variables.VariableV1( 1, name="default_ready_for_local_init_op_v_" + str(uid)) vadd = v.assign_add(1) - w = variables.Variable( + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -590,7 +590,7 @@ class SupervisorTest(test.TestCase): # Create a checkpoint. with ops.Graph().as_default(): - v = variables.Variable( + v = variables.VariableV1( 10.0, name="ready_for_local_init_op_restore_v_" + str(uid)) summary.scalar("ready_for_local_init_op_restore_v_" + str(uid), v) sv = supervisor.Supervisor(logdir=logdir) @@ -607,10 +607,10 @@ class SupervisorTest(test.TestCase): g = ops.Graph() with g.as_default(): with ops.device("/job:local"): - v = variables.Variable( + v = variables.VariableV1( 1.0, name="ready_for_local_init_op_restore_v_" + str(uid)) vadd = v.assign_add(1) - w = variables.Variable( + w = variables.VariableV1( v, trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES], @@ -642,13 +642,13 @@ class SupervisorTest(test.TestCase): logdir = self._test_dir("default_local_init_op") with ops.Graph().as_default(): # A local variable. - v = variables.Variable( + v = variables.VariableV1( [1.0, 2.0, 3.0], trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES]) # An entity which is initialized through a TABLE_INITIALIZER. - w = variables.Variable([4, 5, 6], trainable=False, collections=[]) + w = variables.VariableV1([4, 5, 6], trainable=False, collections=[]) ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, w.initializer) # This shouldn't add a variable to the VARIABLES collection responsible @@ -668,7 +668,7 @@ class SupervisorTest(test.TestCase): with ops.Graph().as_default(): with ops.device("/job:localhost"): # A local variable. - v = variables.Variable( + v = variables.VariableV1( [1.0, 2.0, 3.0], trainable=False, collections=[ops.GraphKeys.LOCAL_VARIABLES]) @@ -687,8 +687,8 @@ class SupervisorTest(test.TestCase): server = server_lib.Server.create_local_server() logdir = self._test_dir("default_init_op_fails") with ops.Graph().as_default(): - v = variables.Variable([1.0, 2.0, 3.0], name="v") - variables.Variable([4.0, 5.0, 6.0], name="w") + v = variables.VariableV1([1.0, 2.0, 3.0], name="v") + variables.VariableV1([4.0, 5.0, 6.0], name="w") # w will not be initialized. sv = supervisor.Supervisor(logdir=logdir, init_op=v.initializer) with self.assertRaisesRegexp(RuntimeError, @@ -699,11 +699,11 @@ class SupervisorTest(test.TestCase): server = server_lib.Server.create_local_server() logdir = self._test_dir("default_init_op_fails_for_local_variable") with ops.Graph().as_default(): - v = variables.Variable( + v = variables.VariableV1( [1.0, 2.0, 3.0], name="v", collections=[ops.GraphKeys.LOCAL_VARIABLES]) - variables.Variable( + variables.VariableV1( [1.0, 2.0, 3.0], name="w", collections=[ops.GraphKeys.LOCAL_VARIABLES]) @@ -716,17 +716,17 @@ class SupervisorTest(test.TestCase): def testSetupFail(self): logdir = self._test_dir("setup_fail") with ops.Graph().as_default(): - variables.Variable([1.0, 2.0, 3.0], name="v") + variables.VariableV1([1.0, 2.0, 3.0], name="v") with self.assertRaisesRegexp(ValueError, "must have their device set"): supervisor.Supervisor(logdir=logdir, is_chief=False) with ops.Graph().as_default(), ops.device("/job:ps"): - variables.Variable([1.0, 2.0, 3.0], name="v") + variables.VariableV1([1.0, 2.0, 3.0], name="v") supervisor.Supervisor(logdir=logdir, is_chief=False) def testDefaultGlobalStep(self): logdir = self._test_dir("default_global_step") with ops.Graph().as_default(): - variables.Variable(287, name="global_step") + variables.VariableV1(287, name="global_step") sv = supervisor.Supervisor(logdir=logdir) sess = sv.prepare_or_wait_for_session("") self.assertEquals(287, sess.run(sv.global_step)) @@ -735,7 +735,7 @@ class SupervisorTest(test.TestCase): def testRestoreFromMetaGraph(self): logdir = self._test_dir("restore_from_meta_graph") with ops.Graph().as_default(): - variables.Variable(1, name="v0") + variables.VariableV1(1, name="v0") sv = supervisor.Supervisor(logdir=logdir) sess = sv.prepare_or_wait_for_session("") filename = sv.saver.save(sess, sv.save_path) @@ -757,7 +757,7 @@ class SupervisorTest(test.TestCase): logdir = self._test_dir("standard_services_without_global_step") # Create a checkpoint. with ops.Graph().as_default(): - v = variables.Variable([1.0], name="foo") + v = variables.VariableV1([1.0], name="foo") summary.scalar("v", v[0]) sv = supervisor.Supervisor(logdir=logdir) meta_graph_def = meta_graph.create_meta_graph_def( @@ -796,7 +796,7 @@ class SupervisorTest(test.TestCase): self.assertRaises(StopIteration, lambda: next(rr)) # There should be a checkpoint file with the variable "foo" with ops.Graph().as_default(), self.cached_session() as sess: - v = variables.Variable([10.10], name="foo") + v = variables.VariableV1([10.10], name="foo") sav = saver_lib.Saver([v]) sav.restore(sess, save_path) self.assertEqual(1.0, v.eval()[0]) @@ -807,7 +807,7 @@ class SupervisorTest(test.TestCase): logdir = self._test_dir("standard_services_with_global_step") # Create a checkpoint. with ops.Graph().as_default(): - v = variables.Variable([123], name="global_step") + v = variables.VariableV1([123], name="global_step") sv = supervisor.Supervisor(logdir=logdir) meta_graph_def = meta_graph.create_meta_graph_def( saver_def=sv.saver.saver_def) @@ -860,7 +860,7 @@ class SupervisorTest(test.TestCase): self.assertRaises(StopIteration, lambda: next(rr)) # There should be a checkpoint file with the variable "foo" with ops.Graph().as_default(), self.cached_session() as sess: - v = variables.Variable([-12], name="global_step") + v = variables.VariableV1([-12], name="global_step") sav = saver_lib.Saver([v]) sav.restore(sess, save_path) self.assertEqual(123, v.eval()[0]) diff --git a/tensorflow/python/training/sync_replicas_optimizer_test.py b/tensorflow/python/training/sync_replicas_optimizer_test.py index fff17402e2..1ef8756ef6 100644 --- a/tensorflow/python/training/sync_replicas_optimizer_test.py +++ b/tensorflow/python/training/sync_replicas_optimizer_test.py @@ -40,11 +40,12 @@ def get_workers(num_workers, replicas_to_aggregate, workers): is_chief = (worker_id == 0) with graph.as_default(): with ops.device("/job:ps/task:0"): - global_step = variables.Variable(0, name="global_step", trainable=False) - var_0 = variables.Variable(0.0, name="v0") + global_step = variables.VariableV1( + 0, name="global_step", trainable=False) + var_0 = variables.VariableV1(0.0, name="v0") with ops.device("/job:ps/task:1"): - var_1 = variables.Variable(1.0, name="v1") - var_sparse = variables.Variable([[3.0], [4.0]], name="v_sparse") + var_1 = variables.VariableV1(1.0, name="v1") + var_sparse = variables.VariableV1([[3.0], [4.0]], name="v_sparse") with ops.device("/job:worker/task:" + str(worker_id)): grads_0 = constant_op.constant(0.1 + worker_id * 0.2) @@ -272,8 +273,8 @@ class SyncReplicasOptimizerHookTest(test.TestCase): replicas_to_aggregate=1, total_num_replicas=1) hook = opt.make_session_run_hook(True) - v = variables.Variable([0.]) - global_step = variables.Variable(0, name="global_step", trainable=False) + v = variables.VariableV1([0.]) + global_step = variables.VariableV1(0, name="global_step", trainable=False) opt.minimize(v, global_step=global_step) hook.begin() @@ -282,8 +283,8 @@ class SyncReplicasOptimizerHookTest(test.TestCase): opt=adam.AdamOptimizer(0.01), replicas_to_aggregate=1, total_num_replicas=1) - v = variables.Variable([0.], name="fetch_variable_test") - global_step = variables.Variable(0, name="global_step", trainable=False) + v = variables.VariableV1([0.], name="fetch_variable_test") + global_step = variables.VariableV1(0, name="global_step", trainable=False) opt.minimize(v, global_step=global_step) opt_variables = opt.variables() beta1_power, beta2_power = opt._opt._get_beta_accumulators() diff --git a/tensorflow/python/training/training_ops_test.py b/tensorflow/python/training/training_ops_test.py index d131a11067..f410ceaaff 100644 --- a/tensorflow/python/training/training_ops_test.py +++ b/tensorflow/python/training/training_ops_test.py @@ -51,7 +51,7 @@ class TrainingOpsTest(TensorFlowTestCase): def _testTypes(self, x, alpha, delta, use_gpu=None): self.setUp() with self.test_session(use_gpu=use_gpu): - var = variables.Variable(x) + var = variables.VariableV1(x) variables.global_variables_initializer().run() self.assertAllCloseAccordingToType(x, var.eval()) apply_sgd = training_ops.apply_gradient_descent(var, alpha, delta) @@ -70,8 +70,8 @@ class TrainingOpsTest(TensorFlowTestCase): def _testTypesForAdagrad(self, x, y, lr, grad, use_gpu=None): self.setUp() with self.test_session(use_gpu=use_gpu): - var = variables.Variable(x) - accum = variables.Variable(y) + var = variables.VariableV1(x) + accum = variables.VariableV1(y) variables.global_variables_initializer().run() self.assertAllCloseAccordingToType(x, var.eval()) @@ -94,9 +94,9 @@ class TrainingOpsTest(TensorFlowTestCase): lr_power=-0.5): self.setUp() with self.test_session(use_gpu=use_gpu): - var = variables.Variable(x) - accum = variables.Variable(y) - linear = variables.Variable(z) + var = variables.VariableV1(x) + accum = variables.VariableV1(y) + linear = variables.VariableV1(z) variables.global_variables_initializer().run() self.assertAllCloseAccordingToType(x, var.eval()) @@ -148,8 +148,8 @@ class TrainingOpsTest(TensorFlowTestCase): def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices): self.setUp() with self.test_session(use_gpu=False): - var = variables.Variable(x) - accum = variables.Variable(y) + var = variables.VariableV1(x) + accum = variables.VariableV1(y) variables.global_variables_initializer().run() self.assertAllCloseAccordingToType(x, var.eval()) @@ -178,9 +178,9 @@ class TrainingOpsTest(TensorFlowTestCase): lr_power=-0.5): self.setUp() with self.test_session(use_gpu=False): - var = variables.Variable(x) - accum = variables.Variable(y) - linear = variables.Variable(z) + var = variables.VariableV1(x) + accum = variables.VariableV1(y) + linear = variables.VariableV1(z) variables.global_variables_initializer().run() self.assertAllCloseAccordingToType(x, var.eval()) @@ -257,9 +257,9 @@ class TrainingOpsTest(TensorFlowTestCase): def _testTypesForAdam(self, var, m, v, grad, use_gpu): self.setUp() with self.test_session(use_gpu=use_gpu): - var_t = variables.Variable(var) - m_t = variables.Variable(m) - v_t = variables.Variable(v) + var_t = variables.VariableV1(var) + m_t = variables.VariableV1(m) + v_t = variables.VariableV1(v) t = 1 beta1 = np.array(0.9, dtype=var.dtype) @@ -270,8 +270,8 @@ class TrainingOpsTest(TensorFlowTestCase): epsilon = np.array(1e-8, dtype=var.dtype) beta1_t = constant_op.constant(beta1, self._toType(var.dtype), []) beta2_t = constant_op.constant(beta2, self._toType(var.dtype), []) - beta1_power_t = variables.Variable(beta1_power) - beta2_power_t = variables.Variable(beta2_power) + beta1_power_t = variables.VariableV1(beta1_power) + beta2_power_t = variables.VariableV1(beta2_power) lr_t = constant_op.constant(lr, self._toType(var.dtype), []) epsilon_t = constant_op.constant(epsilon, self._toType(var.dtype), []) variables.global_variables_initializer().run() diff --git a/tensorflow/python/training/training_util_test.py b/tensorflow/python/training/training_util_test.py index 6cc177e0e8..ba64e785ac 100644 --- a/tensorflow/python/training/training_util_test.py +++ b/tensorflow/python/training/training_util_test.py @@ -49,7 +49,7 @@ class GlobalStepTest(test.TestCase): def test_invalid_shape(self): with ops.Graph().as_default() as g: self.assertIsNone(training_util.get_global_step()) - variables.Variable( + variables.VariableV1( [0], trainable=False, dtype=dtypes.int32, @@ -73,7 +73,7 @@ class GlobalStepTest(test.TestCase): def test_get_global_step(self): with ops.Graph().as_default() as g: self.assertIsNone(training_util.get_global_step()) - variables.Variable( + variables.VariableV1( 0, trainable=False, dtype=dtypes.int32, diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt index 05698b03ee..af7fc9d4ef 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-variable.pbtxt @@ -1,5 +1,6 @@ path: "tensorflow.Variable" tf_class { + is_instance: "" is_instance: "" is_instance: "" is_instance: "" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt index 503e145a91..509ceff9df 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.pbtxt @@ -2220,6 +2220,10 @@ tf_module { name: "variable_axis_size_partitioner" argspec: "args=[\'max_shard_bytes\', \'axis\', \'bytes_per_string_element\', \'max_shards\'], varargs=None, keywords=None, defaults=[\'0\', \'16\', \'None\'], " } + member_method { + name: "variable_creator_scope" + argspec: "args=[\'variable_creator\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "variable_op_scope" argspec: "args=[\'values\', \'name_or_scope\', \'default_name\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable-scope.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable-scope.pbtxt deleted file mode 100644 index c13eb7b8bb..0000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.-variable-scope.pbtxt +++ /dev/null @@ -1,105 +0,0 @@ -path: "tensorflow.VariableScope" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "caching_device" - mtype: "" - } - member { - name: "constraint" - mtype: "" - } - member { - name: "custom_getter" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "initializer" - mtype: "" - } - member { - name: "name" - mtype: "" - } - member { - name: "original_name_scope" - mtype: "" - } - member { - name: "partitioner" - mtype: "" - } - member { - name: "regularizer" - mtype: "" - } - member { - name: "reuse" - mtype: "" - } - member { - name: "use_resource" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'reuse\', \'name\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'name_scope\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'\', \'None\', \'None\', \'None\', \'None\', \'None\', \'\', \"\", \'None\', \'None\'], " - } - member_method { - name: "get_collection" - argspec: "args=[\'self\', \'name\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "get_variable" - argspec: "args=[\'self\', \'var_store\', \'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'reuse\', \'trainable\', \'collections\', \'caching_device\', \'partitioner\', \'validate_shape\', \'use_resource\', \'custom_getter\', \'constraint\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " - } - member_method { - name: "global_variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "local_variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "reuse_variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_caching_device" - argspec: "args=[\'self\', \'caching_device\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_custom_getter" - argspec: "args=[\'self\', \'custom_getter\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_dtype" - argspec: "args=[\'self\', \'dtype\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_initializer" - argspec: "args=[\'self\', \'initializer\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_partitioner" - argspec: "args=[\'self\', \'partitioner\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_regularizer" - argspec: "args=[\'self\', \'regularizer\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "set_use_resource" - argspec: "args=[\'self\', \'use_resource\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "trainable_variables" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt deleted file mode 100644 index ac3ccd468b..0000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.-variable.-save-slice-info.pbtxt +++ /dev/null @@ -1,17 +0,0 @@ -path: "tensorflow.Variable.SaveSliceInfo" -tf_class { - is_instance: "" - is_instance: "" - member { - name: "spec" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'full_name\', \'full_shape\', \'var_offset\', \'var_shape\', \'save_slice_info_def\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " - } - member_method { - name: "to_proto" - argspec: "args=[\'self\', \'export_scope\'], varargs=None, keywords=None, defaults=[\'None\'], " - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt deleted file mode 100644 index 05698b03ee..0000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.-variable.pbtxt +++ /dev/null @@ -1,130 +0,0 @@ -path: "tensorflow.Variable" -tf_class { - is_instance: "" - is_instance: "" - is_instance: "" - member { - name: "SaveSliceInfo" - mtype: "" - } - member { - name: "constraint" - mtype: "" - } - member { - name: "device" - mtype: "" - } - member { - name: "dtype" - mtype: "" - } - member { - name: "graph" - mtype: "" - } - member { - name: "initial_value" - mtype: "" - } - member { - name: "initializer" - mtype: "" - } - member { - name: "name" - mtype: "" - } - member { - name: "op" - mtype: "" - } - member { - name: "shape" - mtype: "" - } - member { - name: "trainable" - mtype: "" - } - member_method { - name: "__init__" - argspec: "args=[\'self\', \'initial_value\', \'trainable\', \'collections\', \'validate_shape\', \'caching_device\', \'name\', \'variable_def\', \'dtype\', \'expected_shape\', \'import_scope\', \'constraint\', \'use_resource\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'True\', \'None\', \'True\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " - } - member_method { - name: "assign" - argspec: "args=[\'self\', \'value\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], " - } - member_method { - name: "assign_add" - argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], " - } - member_method { - name: "assign_sub" - argspec: "args=[\'self\', \'delta\', \'use_locking\', \'name\', \'read_value\'], varargs=None, keywords=None, defaults=[\'False\', \'None\', \'True\'], " - } - member_method { - name: "count_up_to" - argspec: "args=[\'self\', \'limit\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "eval" - argspec: "args=[\'self\', \'session\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "from_proto" - argspec: "args=[\'variable_def\', \'import_scope\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "get_shape" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "initialized_value" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "load" - argspec: "args=[\'self\', \'value\', \'session\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "read_value" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "scatter_add" - argspec: "args=[\'self\', \'sparse_delta\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " - } - member_method { - name: "scatter_nd_add" - argspec: "args=[\'self\', \'indices\', \'updates\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "scatter_nd_sub" - argspec: "args=[\'self\', \'indices\', \'updates\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "scatter_nd_update" - argspec: "args=[\'self\', \'indices\', \'updates\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "scatter_sub" - argspec: "args=[\'self\', \'sparse_delta\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " - } - member_method { - name: "scatter_update" - argspec: "args=[\'self\', \'sparse_delta\', \'use_locking\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " - } - member_method { - name: "set_shape" - argspec: "args=[\'self\', \'shape\'], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "to_proto" - argspec: "args=[\'self\', \'export_scope\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "value" - argspec: "args=[\'self\'], varargs=None, keywords=None, defaults=None" - } -} diff --git a/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt index d499c67d89..e3c63fe737 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.initializers.pbtxt @@ -48,10 +48,6 @@ tf_module { name: "zeros" mtype: "" } - member_method { - name: "global_variables" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } member_method { name: "he_normal" argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -68,12 +64,4 @@ tf_module { name: "lecun_uniform" argspec: "args=[\'seed\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "local_variables" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "variables" - argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'init\'], " - } } diff --git a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt index 96212f5528..d2dc8bc85f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.pbtxt @@ -1,9 +1,5 @@ path: "tensorflow" tf_module { - member { - name: "AUTO_REUSE" - mtype: "" - } member { name: "AggregationMethod" mtype: "" @@ -232,18 +228,10 @@ tf_module { name: "VarLenFeature" mtype: "" } - member { - name: "Variable" - mtype: "" - } member { name: "VariableAggregation" mtype: "" } - member { - name: "VariableScope" - mtype: "" - } member { name: "VariableSynchronization" mtype: "" @@ -552,10 +540,6 @@ tf_module { name: "user_ops" mtype: "" } - member { - name: "variable_scope" - mtype: "" - } member { name: "variance_scaling_initializer" mtype: "" @@ -616,10 +600,6 @@ tf_module { name: "add_to_collections" argspec: "args=[\'names\', \'value\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "all_variables" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } member_method { name: "angle" argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -732,10 +712,6 @@ tf_module { name: "assert_type" argspec: "args=[\'tensor\', \'tf_type\', \'message\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } - member_method { - name: "assert_variables_initialized" - argspec: "args=[\'var_list\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "atan" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -1136,10 +1112,6 @@ tf_module { name: "get_default_session" argspec: "args=[], varargs=None, keywords=None, defaults=None" } - member_method { - name: "get_local_variable" - argspec: "args=[\'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'collections\', \'caching_device\', \'partitioner\', \'validate_shape\', \'use_resource\', \'custom_getter\', \'constraint\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'False\', \'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " - } member_method { name: "get_seed" argspec: "args=[\'op_seed\'], varargs=None, keywords=None, defaults=None" @@ -1152,26 +1124,10 @@ tf_module { name: "get_session_tensor" argspec: "args=[\'handle\', \'dtype\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "get_variable" - argspec: "args=[\'name\', \'shape\', \'dtype\', \'initializer\', \'regularizer\', \'trainable\', \'collections\', \'caching_device\', \'partitioner\', \'validate_shape\', \'use_resource\', \'custom_getter\', \'constraint\', \'synchronization\', \'aggregation\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\', \'None\', \'None\', \'None\', \'VariableSynchronization.AUTO\', \'VariableAggregation.NONE\'], " - } - member_method { - name: "get_variable_scope" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } member_method { name: "global_norm" argspec: "args=[\'t_list\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "global_variables" - argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "global_variables_initializer" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } member_method { name: "gradients" argspec: "args=[\'ys\', \'xs\', \'grad_ys\', \'name\', \'colocate_gradients_with_ops\', \'gate_gradients\', \'aggregation_method\', \'stop_gradients\'], varargs=None, keywords=None, defaults=[\'None\', \'gradients\', \'False\', \'False\', \'None\', \'None\'], " @@ -1248,18 +1204,6 @@ tf_module { name: "initialize_all_tables" argspec: "args=[\'name\'], varargs=None, keywords=None, defaults=[\'init_all_tables\'], " } - member_method { - name: "initialize_all_variables" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "initialize_local_variables" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } - member_method { - name: "initialize_variables" - argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'init\'], " - } member_method { name: "invert_permutation" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -1288,10 +1232,6 @@ tf_module { name: "is_strictly_increasing" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "is_variable_initialized" - argspec: "args=[\'variable\'], varargs=None, keywords=None, defaults=None" - } member_method { name: "lbeta" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -1328,14 +1268,6 @@ tf_module { name: "load_op_library" argspec: "args=[\'library_filename\'], varargs=None, keywords=None, defaults=None" } - member_method { - name: "local_variables" - argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "local_variables_initializer" - argspec: "args=[], varargs=None, keywords=None, defaults=None" - } member_method { name: "log" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -1448,14 +1380,6 @@ tf_module { name: "mod" argspec: "args=[\'x\', \'y\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "model_variables" - argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], " - } - member_method { - name: "moving_average_variables" - argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "multinomial" argspec: "args=[\'logits\', \'num_samples\', \'seed\', \'name\', \'output_dtype\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " @@ -1656,10 +1580,6 @@ tf_module { name: "register_tensor_conversion_function" argspec: "args=[\'base_type\', \'conversion_func\', \'priority\'], varargs=None, keywords=None, defaults=[\'100\'], " } - member_method { - name: "report_uninitialized_variables" - argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'report_uninitialized_variables\'], " - } member_method { name: "required_space_to_batch_paddings" argspec: "args=[\'input_shape\', \'block_shape\', \'base_paddings\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " @@ -2068,10 +1988,6 @@ tf_module { name: "trace" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } - member_method { - name: "trainable_variables" - argspec: "args=[\'scope\'], varargs=None, keywords=None, defaults=[\'None\'], " - } member_method { name: "transpose" argspec: "args=[\'a\', \'perm\', \'name\', \'conjugate\'], varargs=None, keywords=None, defaults=[\'None\', \'transpose\', \'False\'], " @@ -2140,14 +2056,6 @@ tf_module { name: "variable_axis_size_partitioner" argspec: "args=[\'max_shard_bytes\', \'axis\', \'bytes_per_string_element\', \'max_shards\'], varargs=None, keywords=None, defaults=[\'0\', \'16\', \'None\'], " } - member_method { - name: "variable_op_scope" - argspec: "args=[\'values\', \'name_or_scope\', \'default_name\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\'], " - } - member_method { - name: "variables_initializer" - argspec: "args=[\'var_list\', \'name\'], varargs=None, keywords=None, defaults=[\'init\'], " - } member_method { name: "verify_tensor_all_finite" argspec: "args=[\'t\', \'msg\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.variable_scope.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.variable_scope.pbtxt deleted file mode 100644 index e62dec93e6..0000000000 --- a/tensorflow/tools/api/golden/v2/tensorflow.variable_scope.pbtxt +++ /dev/null @@ -1,9 +0,0 @@ -path: "tensorflow.variable_scope" -tf_class { - is_instance: "" - is_instance: "" - member_method { - name: "__init__" - argspec: "args=[\'self\', \'name_or_scope\', \'default_name\', \'values\', \'initializer\', \'regularizer\', \'caching_device\', \'partitioner\', \'custom_getter\', \'reuse\', \'dtype\', \'use_resource\', \'constraint\', \'auxiliary_name_scope\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'None\', \'True\'], " - } -} -- GitLab From f172c52ac74ae6db228119b90785add81648372e Mon Sep 17 00:00:00 2001 From: avijit-nervana Date: Thu, 27 Sep 2018 12:57:24 -0700 Subject: [PATCH 093/570] Fixed the broken unit tests --- configure.py | 2 +- tensorflow/workspace.bzl | 16 ++++++++-------- third_party/mkl/build_defs.bzl | 2 +- third_party/ngraph/ngraph.BUILD | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/configure.py b/configure.py index cc6a654a61..f71caa1994 100644 --- a/configure.py +++ b/configure.py @@ -1631,7 +1631,7 @@ def main(): config_info_line('monolithic', 'Config for mostly static monolithic build.') config_info_line('gdr', 'Build with GDR support.') config_info_line('verbs', 'Build with libverbs support.') - config_info_line('ngraph', 'Build with Intel ngraph support.') + config_info_line('ngraph', 'Build with Intel nGraph support.') if __name__ == '__main__': diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index e5a0a0b2b7..6966783efd 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -862,11 +862,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "ngraph", urls = [ - "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.8.0.tar.gz", - "https://github.com/NervanaSystems/ngraph/archive/v0.8.0.tar.gz", + "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.8.1.tar.gz", + "https://github.com/NervanaSystems/ngraph/archive/v0.8.1.tar.gz", ], - sha256 = "a8cf3ef2d0e6d31b54eb33f6a9e795f562195ce5c2a857e729ca9c35241cc45c", - strip_prefix = "ngraph-0.8.0", + sha256 = "bf9dcc88e5c66021e3aac80491a231711211540d613bf9b6bd28db3f5bb86b62", + strip_prefix = "ngraph-0.8.1", build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), ) @@ -884,11 +884,11 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "ngraph_tf", urls = [ - "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.6.0.tar.gz", - "https://github.com/NervanaSystems/ngraph-tf/archive/v0.6.0.tar.gz", + "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.6.1.tar.gz", + "https://github.com/NervanaSystems/ngraph-tf/archive/v0.6.1.tar.gz", ], - sha256 = "1f49391c02bef24872e9f85591e60e0e7eef12a337db71390444118049fe451f", - strip_prefix = "ngraph-tf-0.6.0", + sha256 = "402f84c748c113780a60f35f39aab118435285543aee4900d712b76fbf8a21ee", + strip_prefix = "ngraph-tf-0.6.1", build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), ) diff --git a/third_party/mkl/build_defs.bzl b/third_party/mkl/build_defs.bzl index bb798e715a..10c2d90c84 100644 --- a/third_party/mkl/build_defs.bzl +++ b/third_party/mkl/build_defs.bzl @@ -92,7 +92,7 @@ def if_enable_mkl(if_true, if_false = []): A select evaluating to either if_true or if_false as appropriate. """ return select({ - "//third_party/mkl:enable_mkl": if_true, + str(Label("//third_party/mkl:enable_mkl")): if_true, "//conditions:default": if_false, }) diff --git a/third_party/ngraph/ngraph.BUILD b/third_party/ngraph/ngraph.BUILD index 71b2187011..6602a480af 100644 --- a/third_party/ngraph/ngraph.BUILD +++ b/third_party/ngraph/ngraph.BUILD @@ -110,7 +110,7 @@ cc_library( "-I external/ngraph/src", "-I external/nlohmann_json_lib/include/", '-D SHARED_LIB_EXT=\\".so\\"', - '-D NGRAPH_VERSION=\\"0.8.0\\"', + '-D NGRAPH_VERSION=\\"0.8.1\\"', "-D NGRAPH_DEX_ONLY", ], visibility = ["//visibility:public"], @@ -144,7 +144,7 @@ cc_library( "-I external/ngraph/src", "-I external/nlohmann_json_lib/include/", '-D SHARED_LIB_EXT=\\".so\\"', - '-D NGRAPH_VERSION=\\"0.8.0\\"', + '-D NGRAPH_VERSION=\\"0.8.1\\"', ], visibility = ["//visibility:public"], alwayslink = 1, -- GitLab From 5220e565b7cc32a5f757896c76c7d57c33bcd323 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 27 Sep 2018 14:01:27 -0700 Subject: [PATCH 094/570] Don't use tensorflow::Edge after freeing it Even with this bug we were accidentally doing the right thing (so the test case doesn't actually fail without the fix): deleting an Edge sets its input and output indices to kControlSlot-1 so we'd normally expect to fail when there is a control edge out of the TF cluster (because a control edge would be recognized as a data edge). But AddEdge(x, -1, y, -1) seems to do the right thing for both control and data edges. PiperOrigin-RevId: 214831204 --- tensorflow/compiler/jit/BUILD | 2 + tensorflow/compiler/jit/build_xla_ops_pass.cc | 11 +- .../compiler/jit/build_xla_ops_pass_test.cc | 112 ++++++++++++++++++ 3 files changed, 116 insertions(+), 9 deletions(-) create mode 100644 tensorflow/compiler/jit/build_xla_ops_pass_test.cc diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 4e184729ef..5bf4af1014 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -478,6 +478,7 @@ tf_cc_test( name = "compilation_passes_test", size = "small", srcs = [ + "build_xla_ops_pass_test.cc", "encapsulate_subgraphs_pass_test.cc", "encapsulate_xla_computations_pass_test.cc", "mark_for_compilation_pass_test.cc", @@ -486,6 +487,7 @@ tf_cc_test( deps = [ ":common", ":compilation_passes", + ":node_matchers", ":xla_cluster_util", ":xla_gpu_device", "//tensorflow/cc:cc_ops", diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc index 13a518d0e8..9e3fd93cda 100644 --- a/tensorflow/compiler/jit/build_xla_ops_pass.cc +++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc @@ -112,16 +112,9 @@ static void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) { std::vector out_edges(old_node->out_edges().begin(), old_node->out_edges().end()); for (const Edge* edge : out_edges) { - Node* dst = edge->dst(); - int src_output = edge->src_output(); - int dst_input = edge->dst_input(); + // TODO(sanjoy): This does not update NodeDef inputs. + g->AddEdge(new_node, edge->src_output(), edge->dst(), edge->dst_input()); g->RemoveEdge(edge); - - if (edge->IsControlEdge()) { - g->AddControlEdge(new_node, dst); - } else { - g->AddEdge(new_node, src_output, dst, dst_input); - } } } diff --git a/tensorflow/compiler/jit/build_xla_ops_pass_test.cc b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc new file mode 100644 index 0000000000..b7cb4506b9 --- /dev/null +++ b/tensorflow/compiler/jit/build_xla_ops_pass_test.cc @@ -0,0 +1,112 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/build_xla_ops_pass.h" + +#include "tensorflow/cc/framework/ops.h" +#include "tensorflow/cc/ops/array_ops.h" +#include "tensorflow/cc/ops/resource_variable_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/compiler/jit/defs.h" +#include "tensorflow/compiler/jit/encapsulate_subgraphs_pass.h" +#include "tensorflow/compiler/jit/node_matchers.h" +#include "tensorflow/core/graph/algorithm.h" +#include "tensorflow/core/grappler/optimizers/data/graph_utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +using ::tensorflow::testing::FindNodeByName; +using ::tensorflow::testing::matchers::CtrlDeps; +using ::tensorflow::testing::matchers::NodeWith; +using ::tensorflow::testing::matchers::Op; + +Status BuildXlaOps(const Scope& s, std::unique_ptr* result) { + auto graph = absl::make_unique(OpRegistry::Global()); + TF_RETURN_IF_ERROR(s.ToGraph(graph.get())); + + // Assign all nodes to the CPU device. + static const char* kCpuDevice = "/job:localhost/replica:0/task:0/cpu:0"; + for (Node* n : graph->nodes()) { + if (n->assigned_device_name().empty()) { + n->set_assigned_device_name(kCpuDevice); + } + } + + GraphOptimizationPassOptions opt_options; + opt_options.graph = &graph; + BuildXlaOpsPass pass; + TF_RETURN_IF_ERROR(pass.Run(opt_options)); + *result = std::move(graph); + return Status::OK(); +} + +Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name, + const string& node_name, Node** result) { + NodeDef call_node; + call_node.set_name(node_name); + call_node.set_op(callee_name); + AddNodeAttr(kXlaCompiledKernelAttr, true, &call_node); + AddNodeAttr(kXlaNumConstantArgsAttr, 0, &call_node); + AddNodeAttr(kXlaNumResourceArgsAttr, 0, &call_node); + Status s; + *result = graph->AddNode(call_node, &s); + return s; +} + +Node* MakeWrite(const Scope& scope, const string& id) { + Output var_handle = + ops::VarHandleOp(scope.WithOpName("Var" + id), DT_FLOAT, TensorShape({})); + Output value_to_write = + ops::Const(scope.WithOpName("ValueToAssign" + id), 1.0f); + ops::AssignVariableOp assign_op(scope.WithOpName("Assignee" + id), var_handle, + value_to_write); + return assign_op.operation.node(); +} + +FunctionDefLibrary CreateFunctionDefLibWithConstFunction(const string& name) { + FunctionDefLibrary flib_def; + FunctionDef func = FunctionDefHelper::Create( + /*function_name=*/name, /*in_def=*/{}, /*out_def=*/{"out: float"}, + /*attr_def*/ + {}, /*node_def=*/{FunctionDefHelper::Const("one", 1.0f)}, + /*ret_def=*/{{"out", "out:output:0"}}); + *flib_def.add_function() = std::move(func); + return flib_def; +} + +TEST(BuildXlaOps, ControlDepsPreserved) { + Scope root = Scope::NewRootScope().ExitOnError(); + + FunctionDefLibrary flib_def = + CreateFunctionDefLibWithConstFunction("cluster_0"); + TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def)); + Node* call; + TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), "cluster_0", "C", &call)); + Node* write_op = MakeWrite(root, "write"); + root.graph()->AddControlEdge(call, write_op); + + std::unique_ptr graph; + TF_ASSERT_OK(BuildXlaOps(root, &graph)); + + Node* write_op_new = FindNodeByName(graph.get(), write_op->name()); + ASSERT_NE(write_op_new, nullptr); + EXPECT_THAT(write_op_new, NodeWith(CtrlDeps(NodeWith(Op("_XlaRun"))))); +} + +} // namespace +} // namespace tensorflow -- GitLab From 2fb9377a5ec610b8eff853fd1d2d53eabf711eda Mon Sep 17 00:00:00 2001 From: Russell Power Date: Thu, 27 Sep 2018 14:03:52 -0700 Subject: [PATCH 095/570] Enable worker heartbeat polling for all available workers. PiperOrigin-RevId: 214831772 --- .../contrib/tpu/python/tpu/session_support.py | 52 ++++++++++++------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/session_support.py b/tensorflow/contrib/tpu/python/tpu/session_support.py index 3e91e2df32..24b9bd136b 100644 --- a/tensorflow/contrib/tpu/python/tpu/session_support.py +++ b/tensorflow/contrib/tpu/python/tpu/session_support.py @@ -41,6 +41,25 @@ class CoordinatorShutdownException(Exception): pass +def _make_heartbeat_op(session, device, request_ph): + """Return a heartbeat op or None if heartbeats are not supported by device.""" + try: + with ops.device(device): + heartbeat_op = tpu_ops.worker_heartbeat(request_ph) + request = event_pb2.WorkerHeartbeatRequest() + options = config_pb2.RunOptions(timeout_in_ms=5000) + session.run( + heartbeat_op, + feed_dict={request_ph: request.SerializeToString()}, + options=options) + return heartbeat_op + except errors.InvalidArgumentError as _: + return None + except errors.DeadlineExceededError as _: + logging.warning('Timeout connecting to %s when testing heartbeat', device) + return None + + class WorkerHeartbeatManager(object): """Manages the status/heartbeat monitor for a set of workers.""" @@ -72,30 +91,27 @@ class WorkerHeartbeatManager(object): name='worker_heartbeat_request', dtype=dtypes.string) heartbeat_ops = [] + kept_devices = [] for device in devices: - with ops.device(device): - heartbeat_ops.append(tpu_ops.worker_heartbeat(request_placeholder)) + heartbeat_op = _make_heartbeat_op(session, device, request_placeholder) + if heartbeat_op is not None: + kept_devices.append(device) + heartbeat_ops.append(heartbeat_op) + else: + logging.warning('Heartbeat support not available for %s', device) - return WorkerHeartbeatManager(session, devices, heartbeat_ops, + return WorkerHeartbeatManager(session, kept_devices, heartbeat_ops, request_placeholder) - def heartbeat_supported(self): - """Returns True if heartbeat operations are supported on all workers.""" - try: - # Send ping to verify worker has heartbeat support. - self.ping() - return True - except errors.InvalidArgumentError as _: - return False + def num_workers(self): + return len(self._devices) def configure(self, message): """Configure heartbeat manager for all devices. Args: message: `event_pb2.WorkerHeartbeatRequest` - Returns: `None` - """ logging.info('Configuring worker heartbeat: %s', text_format.MessageToString(message)) @@ -184,7 +200,6 @@ class WatchdogManager(threading.Thread): """Initialize a watchdog manager. Args: - session: Session connected to worker devices. A cloned session and graph will be created for managing worker pings. devices: Set of devices to monitor. If none, all workers will be @@ -277,16 +292,14 @@ class GracefulShutdownHook(session_run_hook.SessionRunHook): target=training_session.sess_str, graph=self._graph) self._workers = WorkerHeartbeatManager.from_devices( self._session, all_worker_devices(self._session)) - self._heartbeat_supported = self._workers.heartbeat_supported() + self._heartbeat_supported = self._workers.num_workers() > 0 if self._heartbeat_supported: self._workers.configure( event_pb2.WorkerHeartbeatRequest( shutdown_mode=event_pb2.WAIT_FOR_COORDINATOR)) else: logging.warn( - 'Worker heartbeats not supported by all workers. No failure ' - 'handling will be enabled.' - ) + 'No workers support hearbeats. Failure handling will be disabled.') def saver(self): if self._saver: @@ -303,8 +316,7 @@ class GracefulShutdownHook(session_run_hook.SessionRunHook): logging.error( 'Multiple savers in the SAVERS collection. On-demand checkpointing ' 'will be disabled. Pass an explicit `saver` to the constructor to ' - 'override this behavior.' - ) + 'override this behavior.') return None return savers[0] -- GitLab From cc83067469bc30bba55932c587f31ef68f15792f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 14:04:06 -0700 Subject: [PATCH 096/570] Migrate a few conv kernels to use new kernel signatures. PiperOrigin-RevId: 214831837 --- tensorflow/contrib/lite/kernels/conv.cc | 70 +++++++++---------- .../kernels/internal/optimized/cblas_conv.h | 54 ++++++++------ .../internal/optimized/multithreaded_conv.h | 60 +++++++++------- 3 files changed, 100 insertions(+), 84 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index 101b4fc961..dbcadbee14 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -86,6 +86,18 @@ struct OpData { bool run_multithreaded_kernel; }; +inline PaddingType RuntimePaddingType(TfLitePadding padding) { + switch (padding) { + case TfLitePadding::kTfLitePaddingSame: + return PaddingType::kSame; + case TfLitePadding::kTfLitePaddingValid: + return PaddingType::kValid; + case TfLitePadding::kTfLitePaddingUnknown: + default: + return PaddingType::kNone; + } +} + void* Init(TfLiteContext* context, const char* buffer, size_t length) { // This is a builtin op, so we don't use the contents in 'buffer', if any. // Instead, we allocate a new object to use as scratch space for im2col, and @@ -487,18 +499,18 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, } else { effective_kernel_type = kernel_type; } + ConvParams op_params; + op_params.padding_type = RuntimePaddingType(params->padding); + op_params.padding_values.width = data->padding.width; + op_params.padding_values.height = data->padding.height; + op_params.stride_width = params->stride_width; + op_params.stride_height = params->stride_height; + op_params.dilation_width_factor = params->dilation_width_factor; + op_params.dilation_height_factor = params->dilation_height_factor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; switch (effective_kernel_type) { case kReference: { - ConvParams op_params; - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; - op_params.stride_width = params->stride_width; - op_params.stride_height = params->stride_height; - op_params.dilation_width_factor = params->dilation_width_factor; - op_params.dilation_height_factor = params->dilation_height_factor; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; reference_ops::Conv(op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), @@ -508,16 +520,6 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, break; } case kGenericOptimized: { - ConvParams op_params; - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = data->padding.width; - op_params.padding_values.height = data->padding.height; - op_params.stride_width = params->stride_width; - op_params.stride_height = params->stride_height; - op_params.dilation_width_factor = params->dilation_width_factor; - op_params.dilation_height_factor = params->dilation_height_factor; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; optimized_ops::Conv(op_params, GetTensorShape(input), GetTensorData(input), GetTensorShape(filter), GetTensorData(filter), GetTensorShape(bias), @@ -534,25 +536,21 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node, filter_data = GetTensorData(filter); } multithreaded_ops::Conv( - *eigen_support::GetThreadPoolDevice(context), - GetTensorData(input), GetTensorDims(input), filter_data, - GetTensorDims(filter), GetTensorData(bias), - GetTensorDims(bias), params->stride_width, params->stride_height, - data->padding.width, data->padding.height, params->padding, - output_activation_min, output_activation_max, - GetTensorData(output), GetTensorDims(output), - GetTensorData(im2col), GetTensorDims(im2col)); + *eigen_support::GetThreadPoolDevice(context), op_params, + GetTensorShape(input), GetTensorData(input), + GetTensorShape(filter), filter_data, GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output), GetTensorShape(im2col), + GetTensorData(im2col)); break; } case kCblasOptimized: { - cblas_ops::Conv(GetTensorData(input), GetTensorDims(input), - GetTensorData(filter), GetTensorDims(filter), - GetTensorData(bias), GetTensorDims(bias), - params->stride_width, params->stride_height, - data->padding.width, data->padding.height, - output_activation_min, output_activation_max, - GetTensorData(output), GetTensorDims(output), - GetTensorData(im2col), GetTensorDims(im2col)); + cblas_ops::Conv(op_params, GetTensorShape(input), + GetTensorData(input), GetTensorShape(filter), + GetTensorData(filter), GetTensorShape(bias), + GetTensorData(bias), GetTensorShape(output), + GetTensorData(output), GetTensorShape(im2col), + GetTensorData(im2col)); break; } } diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h b/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h index 40d42bbae9..2d96da65c3 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h @@ -31,20 +31,29 @@ limitations under the License. namespace tflite { namespace cblas_ops { -inline void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { +inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data, const RuntimeShape& im2col_shape, + float* im2col_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const int dilation_width_factor = params.dilation_width_factor; + const int dilation_height_factor = params.dilation_height_factor; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); gemmlowp::ScopedProfilingLabel label("Conv/cblas"); const float* gemm_input_data = nullptr; - const Dims<4>* gemm_input_dims = nullptr; - const int filter_width = ArraySize(filter_dims, 1); - const int filter_height = ArraySize(filter_dims, 2); + const RuntimeShape* gemm_input_shape = nullptr; + const int filter_width = filter_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); const bool need_im2col = stride_width != 1 || stride_height != 1 || filter_width != 1 || filter_height != 1; if (need_im2col) { @@ -55,18 +64,17 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, op_params.padding_values.height = pad_height; op_params.stride_width = stride_width; op_params.stride_height = stride_height; - op_params.dilation_width_factor = 1; - op_params.dilation_height_factor = 1; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; optimized_ops::Im2col(op_params, filter_height, filter_width, 0, - DimsToShape(input_dims), input_data, - DimsToShape(im2col_dims), im2col_data); + input_shape, input_data, im2col_shape, im2col_data); gemm_input_data = im2col_data; - gemm_input_dims = &im2col_dims; + gemm_input_shape = &im2col_shape; } else { TFLITE_DCHECK(!im2col_data); gemm_input_data = input_data; - gemm_input_dims = &input_dims; + gemm_input_shape = &input_shape; } // The following code computes matrix multiplication c = a * transponse(b) @@ -78,10 +86,10 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, const float* a = gemm_input_data; const float* b = filter_data; float* c = output_data; - int m = gemm_input_dims->sizes[1] * gemm_input_dims->sizes[2] * - gemm_input_dims->sizes[3]; - int n = output_dims.sizes[0]; - int k = gemm_input_dims->sizes[0]; + const int gemm_input_dims = gemm_input_shape->DimensionsCount(); + int m = FlatSizeSkipDim(*gemm_input_shape, gemm_input_dims - 1); + int n = output_shape.Dims(3); + int k = gemm_input_shape->Dims(gemm_input_dims - 1); // The stride of matrix a, b and c respectively. int stride_a = k; int stride_b = k; @@ -91,8 +99,8 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims, stride_a, b, stride_b, 0.0f, c, stride_c); optimized_ops::AddBiasAndEvalActivationFunction( - output_activation_min, output_activation_max, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data); + output_activation_min, output_activation_max, bias_shape, bias_data, + output_shape, output_data); } } // namespace cblas_ops diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h b/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h index b5d001cc9e..4139cf4eba 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h @@ -69,13 +69,13 @@ struct MatMulConvFunctor { template class EigenTensorConvFunctor { private: - Eigen::PaddingType TfLitePadding2EigenPadding(TfLitePadding padding) { + Eigen::PaddingType RuntimePadding2EigenPadding(PaddingType padding) { switch (padding) { - case kTfLitePaddingValid: + case PaddingType::kValid: return Eigen::PADDING_VALID; - case kTfLitePaddingSame: + case PaddingType::kSame: return Eigen::PADDING_SAME; - case kTfLitePaddingUnknown: + case PaddingType::kNone: assert(false); // should never get here. return Eigen::PADDING_VALID; } @@ -89,7 +89,7 @@ class EigenTensorConvFunctor { int input_width, int input_depth, const T* filter_data, int filter_height, int filter_width, int filter_count, int stride_rows, int stride_cols, int pad_width, - int pad_height, TfLitePadding padding, T* output_data, + int pad_height, PaddingType padding, T* output_data, int output_height, int output_width) { const bool is_1x1_kernel = (filter_height == 1 && filter_width == 1 && stride_rows == 1 && stride_cols == 1); @@ -127,28 +127,38 @@ class EigenTensorConvFunctor { input_depth, filter_count); output.device(device) = Eigen::SpatialConvolution(input, filter, stride_cols, stride_rows, - TfLitePadding2EigenPadding(padding)); + RuntimePadding2EigenPadding(padding)); } } }; -inline void Conv(const Eigen::ThreadPoolDevice& device, const float* input_data, - const Dims<4>& input_dims, const float* filter_data, - const Dims<4>& filter_dims, const float* bias_data, - const Dims<4>& bias_dims, int stride_width, int stride_height, - int pad_width, int pad_height, TfLitePadding padding, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims, - float* im2col_data, const Dims<4>& im2col_dims) { - const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); - const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0); - const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0); - const int input_height = ArraySize(input_dims, 2); - const int input_width = ArraySize(input_dims, 1); - const int filter_height = ArraySize(filter_dims, 2); - const int filter_width = ArraySize(filter_dims, 1); - const int output_height = ArraySize(output_dims, 2); - const int output_width = ArraySize(output_dims, 1); +inline void Conv(const Eigen::ThreadPoolDevice& device, + const ConvParams& params, const RuntimeShape& input_shape, + const float* input_data, const RuntimeShape& filter_shape, + const float* filter_data, const RuntimeShape& bias_shape, + const float* bias_data, const RuntimeShape& output_shape, + float* output_data, const RuntimeShape& im2col_shape, + float* im2col_data) { + const int stride_width = params.stride_width; + const int stride_height = params.stride_height; + const PaddingType padding = params.padding_type; + const int pad_width = params.padding_values.width; + const int pad_height = params.padding_values.height; + const float output_activation_min = params.float_activation_min; + const float output_activation_max = params.float_activation_max; + TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4); + TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4); + + const int batches = MatchingDim(input_shape, 0, output_shape, 0); + const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3); + const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); EigenTensorConvFunctor conv_functor; conv_functor(device, input_data, im2col_data, batches, input_height, input_width, input_depth, filter_data, filter_height, @@ -157,8 +167,8 @@ inline void Conv(const Eigen::ThreadPoolDevice& device, const float* input_data, output_width); optimized_ops::AddBiasAndEvalActivationFunction( - output_activation_min, output_activation_max, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data); + output_activation_min, output_activation_max, bias_shape, bias_data, + output_shape, output_data); } } // namespace multithreaded_ops -- GitLab From d0397c3314600da0c9cdc300ae87483331d54298 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 27 Sep 2018 14:25:18 -0700 Subject: [PATCH 097/570] Rename TFLite Eager delegate -> Flex delegate PiperOrigin-RevId: 214835588 --- .../lite/delegates/{eager => flex}/BUILD | 0 .../delegates/{eager => flex}/buffer_map.cc | 8 ++-- .../delegates/{eager => flex}/buffer_map.h | 12 ++--- .../{eager => flex}/buffer_map_test.cc | 6 +-- .../delegates/{eager => flex}/delegate.cc | 34 +++++++------- .../lite/delegates/{eager => flex}/delegate.h | 26 +++++----- .../{eager => flex}/delegate_data.cc | 6 +-- .../delegates/{eager => flex}/delegate_data.h | 16 +++---- .../{eager => flex}/delegate_data_test.cc | 6 +-- .../{eager => flex}/delegate_test.cc | 14 +++--- .../lite/delegates/{eager => flex}/kernel.cc | 30 ++++++------ .../lite/delegates/{eager => flex}/kernel.h | 12 ++--- .../delegates/{eager => flex}/kernel_test.cc | 16 +++---- .../delegates/{eager => flex}/test_util.cc | 47 +++++++++---------- .../delegates/{eager => flex}/test_util.h | 20 ++++---- .../lite/delegates/{eager => flex}/util.cc | 6 +-- .../lite/delegates/{eager => flex}/util.h | 10 ++-- .../delegates/{eager => flex}/util_test.cc | 6 +-- tensorflow/contrib/lite/kernels/register.cc | 8 ++-- tensorflow/contrib/lite/model.cc | 4 +- tensorflow/contrib/lite/python/convert.py | 6 +-- tensorflow/contrib/lite/python/lite_test.py | 2 +- tensorflow/contrib/lite/testing/BUILD | 2 +- .../contrib/lite/testing/generate_examples.py | 2 +- .../contrib/lite/testing/tflite_diff_flags.h | 4 +- .../contrib/lite/testing/tflite_diff_util.h | 2 +- .../contrib/lite/testing/tflite_driver.cc | 6 +-- .../contrib/lite/testing/tflite_driver.h | 4 +- tensorflow/contrib/lite/toco/args.h | 4 +- .../contrib/lite/toco/import_tensorflow.cc | 4 +- .../contrib/lite/toco/import_tensorflow.h | 2 +- tensorflow/contrib/lite/toco/tflite/export.cc | 20 ++++---- tensorflow/contrib/lite/toco/tflite/export.h | 4 +- .../contrib/lite/toco/tflite/export_test.cc | 2 +- .../contrib/lite/toco/tflite/operator.cc | 26 +++++----- .../contrib/lite/toco/tflite/operator.h | 6 +-- .../contrib/lite/toco/toco_cmdline_flags.cc | 24 +++++----- tensorflow/contrib/lite/toco/toco_flags.proto | 16 +++---- tensorflow/contrib/lite/toco/toco_tooling.cc | 8 ++-- tensorflow/contrib/lite/tools/benchmark/BUILD | 8 ++-- .../tools/benchmark/benchmark_tflite_model.cc | 6 +-- .../tools/benchmark/benchmark_tflite_model.h | 4 +- tensorflow/contrib/lite/util.cc | 6 +-- tensorflow/contrib/lite/util.h | 8 ++-- tensorflow/contrib/lite/util_test.cc | 16 +++---- 45 files changed, 239 insertions(+), 240 deletions(-) rename tensorflow/contrib/lite/delegates/{eager => flex}/BUILD (100%) rename tensorflow/contrib/lite/delegates/{eager => flex}/buffer_map.cc (95%) rename tensorflow/contrib/lite/delegates/{eager => flex}/buffer_map.h (86%) rename tensorflow/contrib/lite/delegates/{eager => flex}/buffer_map_test.cc (98%) rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate.cc (76%) rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate.h (64%) rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_data.cc (94%) rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_data.h (78%) rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_data_test.cc (93%) rename tensorflow/contrib/lite/delegates/{eager => flex}/delegate_test.cc (95%) rename tensorflow/contrib/lite/delegates/{eager => flex}/kernel.cc (91%) rename tensorflow/contrib/lite/delegates/{eager => flex}/kernel.h (79%) rename tensorflow/contrib/lite/delegates/{eager => flex}/kernel_test.cc (94%) rename tensorflow/contrib/lite/delegates/{eager => flex}/test_util.cc (76%) rename tensorflow/contrib/lite/delegates/{eager => flex}/test_util.h (90%) rename tensorflow/contrib/lite/delegates/{eager => flex}/util.cc (96%) rename tensorflow/contrib/lite/delegates/{eager => flex}/util.h (89%) rename tensorflow/contrib/lite/delegates/{eager => flex}/util_test.cc (97%) diff --git a/tensorflow/contrib/lite/delegates/eager/BUILD b/tensorflow/contrib/lite/delegates/flex/BUILD similarity index 100% rename from tensorflow/contrib/lite/delegates/eager/BUILD rename to tensorflow/contrib/lite/delegates/flex/BUILD diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map.cc b/tensorflow/contrib/lite/delegates/flex/buffer_map.cc similarity index 95% rename from tensorflow/contrib/lite/delegates/eager/buffer_map.cc rename to tensorflow/contrib/lite/delegates/flex/buffer_map.cc index e5a19c3997..63e39196d9 100644 --- a/tensorflow/contrib/lite/delegates/eager/buffer_map.cc +++ b/tensorflow/contrib/lite/delegates/flex/buffer_map.cc @@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h" +#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h" #include "tensorflow/c/c_api_internal.h" -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" #include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/log_memory.h" namespace tflite { -namespace eager { +namespace flex { namespace { // A tensor buffer that is allocated, deallocated and populated by TF Lite. class TfLiteTensorBuffer : public tensorflow::TensorBuffer { @@ -107,5 +107,5 @@ void BufferMap::SetFromTensorFlow(int tensor_index, tensorflow::Tensor tensor) { id_to_tensor_[tensor_index] = std::move(tensor); } -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map.h b/tensorflow/contrib/lite/delegates/flex/buffer_map.h similarity index 86% rename from tensorflow/contrib/lite/delegates/eager/buffer_map.h rename to tensorflow/contrib/lite/delegates/flex/buffer_map.h index aaaa045840..4ce886568a 100644 --- a/tensorflow/contrib/lite/delegates/eager/buffer_map.h +++ b/tensorflow/contrib/lite/delegates/flex/buffer_map.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_ #include @@ -21,12 +21,12 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" namespace tflite { -namespace eager { +namespace flex { // Maps a TF Lite tensor index into a TensorFlow tensor. // // The TF Lite interpreter assigns integer indices to each of its tensors, but -// the Eager delegate deals in terms of TensorFlow tensors. This class maps +// the Flex delegate deals in terms of TensorFlow tensors. This class maps // from indices to tensors and allows the creation of new tensors to be // associated with a given index. class BufferMap { @@ -55,7 +55,7 @@ class BufferMap { std::map id_to_tensor_; }; -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_BUFFER_MAP_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_BUFFER_MAP_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc b/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc similarity index 98% rename from tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc rename to tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc index a046943e56..bb80e25e80 100644 --- a/tensorflow/contrib/lite/delegates/eager/buffer_map_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/buffer_map_test.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h" +#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h" #include #include @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/contrib/lite/util.h" namespace tflite { -namespace eager { +namespace flex { namespace { using ::testing::ElementsAre; @@ -164,7 +164,7 @@ TEST(BufferMapTest, TensorFlowOverwritesTfLite) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/delegates/eager/delegate.cc b/tensorflow/contrib/lite/delegates/flex/delegate.cc similarity index 76% rename from tensorflow/contrib/lite/delegates/eager/delegate.cc rename to tensorflow/contrib/lite/delegates/flex/delegate.cc index 45fc158157..ba065a8ff5 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate.cc +++ b/tensorflow/contrib/lite/delegates/flex/delegate.cc @@ -12,19 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/delegate.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" #include #include "tensorflow/contrib/lite/context_util.h" -#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h" -#include "tensorflow/contrib/lite/delegates/eager/kernel.h" -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h" +#include "tensorflow/contrib/lite/delegates/flex/kernel.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" #include "tensorflow/contrib/lite/util.h" #include "tensorflow/core/lib/core/status.h" namespace tflite { -namespace eager { +namespace flex { namespace delegate { TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) { @@ -32,7 +32,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) { TfLiteIntArray* plan; TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan)); - // Add all custom ops starting with "Eager" to list of supported nodes. + // Add all custom ops starting with "Flex" to list of supported nodes. std::vector supported_nodes; for (int node_index : TfLiteIntArrayView(plan)) { TfLiteNode* node; @@ -40,7 +40,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) { TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration( context, node_index, &node, ®istration)); - if (IsEagerOp(registration->custom_name)) { + if (IsFlexOp(registration->custom_name)) { supported_nodes.push_back(node_index); } } @@ -81,28 +81,28 @@ TfLiteStatus CopyFromBufferHandle(TfLiteContext* context, } } // namespace delegate -} // namespace eager +} // namespace flex -std::unique_ptr EagerDelegate::Create() { - std::unique_ptr delegate_data; - if (!eager::DelegateData::Create(&delegate_data).ok()) { +std::unique_ptr FlexDelegate::Create() { + std::unique_ptr delegate_data; + if (!flex::DelegateData::Create(&delegate_data).ok()) { fprintf(stderr, "Unable to initialize TensorFlow context.\n"); return nullptr; } - return std::unique_ptr( - new EagerDelegate(std::move(delegate_data))); + return std::unique_ptr( + new FlexDelegate(std::move(delegate_data))); } -EagerDelegate::EagerDelegate(std::unique_ptr delegate_data) +FlexDelegate::FlexDelegate(std::unique_ptr delegate_data) : TfLiteDelegate{ /*data_=*/delegate_data.get(), - /*nullptr,*/ &eager::delegate::Prepare, - /*CopyFromBufferHandle=*/&eager::delegate::CopyFromBufferHandle, + /*nullptr,*/ &flex::delegate::Prepare, + /*CopyFromBufferHandle=*/&flex::delegate::CopyFromBufferHandle, /*CopyToBufferHandle=*/nullptr, /*FreeBufferHandle=*/nullptr}, delegate_data_(std::move(delegate_data)) {} -EagerDelegate::~EagerDelegate() {} +FlexDelegate::~FlexDelegate() {} } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/delegate.h b/tensorflow/contrib/lite/delegates/flex/delegate.h similarity index 64% rename from tensorflow/contrib/lite/delegates/eager/delegate.h rename to tensorflow/contrib/lite/delegates/flex/delegate.h index 70f3c15af4..1017780dc7 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate.h +++ b/tensorflow/contrib/lite/delegates/flex/delegate.h @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_ #include "tensorflow/contrib/lite/c/c_api_internal.h" -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" namespace tflite { @@ -24,12 +24,12 @@ namespace tflite { // Delegate that can be used to extract parts of a graph that are designed to be // executed by TensorFlow's runtime via Eager. // -// The interpreter must be constructed after the EagerDelegate and destructed -// before the EagerDelegate. This delegate may be used with multiple +// The interpreter must be constructed after the FlexDelegate and destructed +// before the FlexDelegate. This delegate may be used with multiple // interpreters, but it is *not* thread-safe. // // Usage: -// auto delegate = EagerDelegate::Create(); +// auto delegate = FlexDelegate::Create(); // ... build interpreter ... // // if (delegate) { @@ -39,21 +39,21 @@ namespace tflite { // ... run inference ... // ... destroy interpreter ... // ... destroy delegate ... -class EagerDelegate : public TfLiteDelegate { +class FlexDelegate : public TfLiteDelegate { public: // Creates a delegate that supports TF ops. // - // If the underyling TF Eager context creation fails, returns null. - static std::unique_ptr Create(); + // If the underyling TF Flex context creation fails, returns null. + static std::unique_ptr Create(); - ~EagerDelegate(); + ~FlexDelegate(); private: - explicit EagerDelegate(std::unique_ptr delegate_data); + explicit FlexDelegate(std::unique_ptr delegate_data); - std::unique_ptr delegate_data_; + std::unique_ptr delegate_data_; }; } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data.cc b/tensorflow/contrib/lite/delegates/flex/delegate_data.cc similarity index 94% rename from tensorflow/contrib/lite/delegates/eager/delegate_data.cc rename to tensorflow/contrib/lite/delegates/flex/delegate_data.cc index 0fd5c976f8..8f985f770c 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate_data.cc +++ b/tensorflow/contrib/lite/delegates/flex/delegate_data.cc @@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/lib/core/status.h" namespace tflite { -namespace eager { +namespace flex { tensorflow::Status DelegateData::Create(std::unique_ptr* data) { std::vector devices; @@ -43,5 +43,5 @@ DelegateData::DelegateData(tensorflow::EagerContext* eager_context) DelegateData::~DelegateData() {} -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data.h b/tensorflow/contrib/lite/delegates/flex/delegate_data.h similarity index 78% rename from tensorflow/contrib/lite/delegates/eager/delegate_data.h rename to tensorflow/contrib/lite/delegates/flex/delegate_data.h index 772d26f44e..8d75f0b0ef 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate_data.h +++ b/tensorflow/contrib/lite/delegates/flex/delegate_data.h @@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_ -#include "tensorflow/contrib/lite/delegates/eager/buffer_map.h" +#include "tensorflow/contrib/lite/delegates/flex/buffer_map.h" #include "tensorflow/core/common_runtime/eager/context.h" namespace tflite { -namespace eager { +namespace flex { -// Data kept by the Eager delegate for the lifetime of an Interpreter. +// Data kept by the Flex delegate for the lifetime of an Interpreter. class DelegateData { public: // Create a new DelegateData, initialized with a newly-created EagerContext. @@ -29,7 +29,7 @@ class DelegateData { ~DelegateData(); - // The EagerContext that is required for execution of Eager Ops. + // The EagerContext that is required for execution of Flex Ops. tensorflow::EagerContext* GetEagerContext() { return eager_context_.get(); } // Map from TF Lite tensor index to TensorFlow tensor for a given context. @@ -46,7 +46,7 @@ class DelegateData { std::unordered_map buffer_map_; }; -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_DELEGATE_DATA_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_DELEGATE_DATA_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc b/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc similarity index 93% rename from tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc rename to tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc index def063309f..30b10f435a 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate_data_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/delegate_data_test.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" #include #include @@ -20,7 +20,7 @@ limitations under the License. #include "tensorflow/contrib/lite/testing/util.h" namespace tflite { -namespace eager { +namespace flex { namespace { TEST(DelegateDataTest, Basic) { @@ -39,7 +39,7 @@ TEST(DelegateDataTest, Basic) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/delegates/eager/delegate_test.cc b/tensorflow/contrib/lite/delegates/flex/delegate_test.cc similarity index 95% rename from tensorflow/contrib/lite/delegates/eager/delegate_test.cc rename to tensorflow/contrib/lite/delegates/flex/delegate_test.cc index 43ec5d53b8..1813952cef 100644 --- a/tensorflow/contrib/lite/delegates/eager/delegate_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/delegate_test.cc @@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/delegate.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" #include #include -#include "tensorflow/contrib/lite/delegates/eager/test_util.h" +#include "tensorflow/contrib/lite/delegates/flex/test_util.h" namespace tflite { -namespace eager { +namespace flex { namespace { using ::testing::ContainsRegex; using ::testing::ElementsAre; -class DelegateTest : public testing::EagerModelTest { +class DelegateTest : public testing::FlexModelTest { public: DelegateTest() { - delegate_ = EagerDelegate::Create(); + delegate_ = FlexDelegate::Create(); interpreter_.reset(new Interpreter(&error_reporter_)); } @@ -46,7 +46,7 @@ class DelegateTest : public testing::EagerModelTest { } private: - std::unique_ptr delegate_; + std::unique_ptr delegate_; }; TEST_F(DelegateTest, FullGraph) { @@ -236,7 +236,7 @@ TEST_F(DelegateTest, MultipleInterpretersSameDelegate) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.cc b/tensorflow/contrib/lite/delegates/flex/kernel.cc similarity index 91% rename from tensorflow/contrib/lite/delegates/eager/kernel.cc rename to tensorflow/contrib/lite/delegates/flex/kernel.cc index 48a2f56baf..e4f1aea990 100644 --- a/tensorflow/contrib/lite/delegates/eager/kernel.cc +++ b/tensorflow/contrib/lite/delegates/flex/kernel.cc @@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/kernel.h" +#include "tensorflow/contrib/lite/delegates/flex/kernel.h" #include "flatbuffers/flexbuffers.h" // TF:flatbuffers #include "tensorflow/contrib/lite/builtin_ops.h" #include "tensorflow/contrib/lite/c/c_api_internal.h" #include "tensorflow/contrib/lite/context_util.h" -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/string.h" #include "tensorflow/core/common_runtime/eager/context.h" @@ -28,10 +28,10 @@ limitations under the License. #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/node_def_util.h" -// Note: this is part of TF Lite's Eager delegation code which is to be +// Note: this is part of TF Lite's Flex delegation code which is to be // completed soon. -// This is the TF Lite op that is created by the eager delegate to handle +// This is the TF Lite op that is created by the flex delegate to handle // execution of a supported subgraph. The usual flow is that the delegate // informs the interpreter of supported nodes in a graph, and each supported // subgraph is replaced with one instance of this kernel. @@ -46,7 +46,7 @@ limitations under the License. // corresponding TensorFlow/Eager Op. namespace tflite { -namespace eager { +namespace flex { namespace kernel { // Controls the lifetime of tensor handles in a vector. @@ -72,11 +72,11 @@ class VectorOfHandles { // Executes the TensorFlow op given by 'op_name', with the attributes specified // in 'nodedef'. Inputs and outputs are given as indices into the 'buffer_map'. -tensorflow::Status ExecuteEagerOp(tensorflow::EagerContext* eager_context, - BufferMap* buffer_map, const string& op_name, - const tensorflow::NodeDef& nodedef, - const std::vector& inputs, - const std::vector& outputs) { +tensorflow::Status ExecuteFlexOp(tensorflow::EagerContext* eager_context, + BufferMap* buffer_map, const string& op_name, + const tensorflow::NodeDef& nodedef, + const std::vector& inputs, + const std::vector& outputs) { const tensorflow::AttrTypeMap* attr_types; TF_RETURN_WITH_CONTEXT_IF_ERROR( tensorflow::AttrTypeMapForOp(op_name.c_str(), &attr_types), @@ -258,13 +258,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // Execute the TensorFlow Ops sequentially. for (const auto& node_data : op_data->nodes) { if (node_data.nodedef.op().empty()) { - context->ReportError(context, "Invalid NodeDef in Eager op '%s'", + context->ReportError(context, "Invalid NodeDef in Flex op '%s'", node_data.name.c_str()); return kTfLiteError; } auto status = - ExecuteEagerOp(eager_context, buffer_map, node_data.name, - node_data.nodedef, node_data.inputs, node_data.outputs); + ExecuteFlexOp(eager_context, buffer_map, node_data.name, + node_data.nodedef, node_data.inputs, node_data.outputs); TF_LITE_ENSURE_OK(context, ConvertStatus(context, status)); } @@ -295,5 +295,5 @@ TfLiteRegistration GetKernel() { return registration; } -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/kernel.h b/tensorflow/contrib/lite/delegates/flex/kernel.h similarity index 79% rename from tensorflow/contrib/lite/delegates/eager/kernel.h rename to tensorflow/contrib/lite/delegates/flex/kernel.h index 2478abccaa..ac9313a37b 100644 --- a/tensorflow/contrib/lite/delegates/eager/kernel.h +++ b/tensorflow/contrib/lite/delegates/flex/kernel.h @@ -12,23 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_ #include "tensorflow/contrib/lite/c/c_api_internal.h" namespace tflite { -namespace eager { +namespace flex { // Return the registration object used to initialize and execute ops that will // be delegated to TensorFlow's Eager runtime. This TF Lite op is created by -// the eager delegate to handle execution of a supported subgraph. The usual +// the flex delegate to handle execution of a supported subgraph. The usual // flow is that the delegate informs the interpreter of supported nodes in a // graph, and each supported subgraph is replaced with one instance of this // kernel. TfLiteRegistration GetKernel(); -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_KERNEL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_KERNEL_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/kernel_test.cc b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc similarity index 94% rename from tensorflow/contrib/lite/delegates/eager/kernel_test.cc rename to tensorflow/contrib/lite/delegates/flex/kernel_test.cc index 66f2226626..94a6f8b61a 100644 --- a/tensorflow/contrib/lite/delegates/eager/kernel_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/kernel_test.cc @@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/kernel.h" +#include "tensorflow/contrib/lite/delegates/flex/kernel.h" #include #include -#include "tensorflow/contrib/lite/delegates/eager/delegate_data.h" -#include "tensorflow/contrib/lite/delegates/eager/test_util.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate_data.h" +#include "tensorflow/contrib/lite/delegates/flex/test_util.h" namespace tflite { -namespace eager { +namespace flex { namespace { using ::testing::ContainsRegex; @@ -31,12 +31,12 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteDelegate* delegate, TfLiteIntArray* size_and_nodes = ConvertVectorToTfLiteIntArray(supported_nodes); TF_LITE_ENSURE_STATUS(context->ReplaceSubgraphsWithDelegateKernels( - context, eager::GetKernel(), size_and_nodes, delegate)); + context, flex::GetKernel(), size_and_nodes, delegate)); TfLiteIntArrayFree(size_and_nodes); return kTfLiteOk; } -class KernelTest : public testing::EagerModelTest { +class KernelTest : public testing::FlexModelTest { public: KernelTest() { CHECK(DelegateData::Create(&delegate_data_).ok()); @@ -167,7 +167,7 @@ TEST_F(KernelTest, WrongSetOfNodes) { ASSERT_FALSE(Invoke()); ASSERT_THAT(error_reporter().error_messages(), - ContainsRegex("Invalid NodeDef in Eager op")); + ContainsRegex("Invalid NodeDef in Flex op")); } TEST_F(KernelTest, MixedGraph) { @@ -220,7 +220,7 @@ TEST_F(KernelTest, SplitGraph) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.cc b/tensorflow/contrib/lite/delegates/flex/test_util.cc similarity index 76% rename from tensorflow/contrib/lite/delegates/eager/test_util.cc rename to tensorflow/contrib/lite/delegates/flex/test_util.cc index d47be761fb..69c336a01a 100644 --- a/tensorflow/contrib/lite/delegates/eager/test_util.cc +++ b/tensorflow/contrib/lite/delegates/flex/test_util.cc @@ -13,25 +13,24 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/test_util.h" +#include "tensorflow/contrib/lite/delegates/flex/test_util.h" #include "absl/memory/memory.h" #include "flatbuffers/flexbuffers.h" // TF:flatbuffers #include "tensorflow/contrib/lite/string.h" namespace tflite { -namespace eager { +namespace flex { namespace testing { -bool EagerModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; } +bool FlexModelTest::Invoke() { return interpreter_->Invoke() == kTfLiteOk; } -void EagerModelTest::SetShape(int tensor_index, - const std::vector& values) { +void FlexModelTest::SetShape(int tensor_index, const std::vector& values) { ASSERT_EQ(interpreter_->ResizeInputTensor(tensor_index, values), kTfLiteOk); ASSERT_EQ(interpreter_->AllocateTensors(), kTfLiteOk); } -std::vector EagerModelTest::GetShape(int tensor_index) { +std::vector FlexModelTest::GetShape(int tensor_index) { std::vector result; auto* dims = interpreter_->tensor(tensor_index)->dims; result.reserve(dims->size); @@ -41,13 +40,13 @@ std::vector EagerModelTest::GetShape(int tensor_index) { return result; } -TfLiteType EagerModelTest::GetType(int tensor_index) { +TfLiteType FlexModelTest::GetType(int tensor_index) { return interpreter_->tensor(tensor_index)->type; } -void EagerModelTest::AddTensors(int num_tensors, const std::vector& inputs, - const std::vector& outputs, - TfLiteType type, const std::vector& dims) { +void FlexModelTest::AddTensors(int num_tensors, const std::vector& inputs, + const std::vector& outputs, TfLiteType type, + const std::vector& dims) { interpreter_->AddTensors(num_tensors); for (int i = 0; i < num_tensors; ++i) { TfLiteQuantizationParams quant; @@ -66,8 +65,8 @@ void EagerModelTest::AddTensors(int num_tensors, const std::vector& inputs, CHECK_EQ(interpreter_->SetOutputs(outputs), kTfLiteOk); } -void EagerModelTest::AddTfLiteMulOp(const std::vector& inputs, - const std::vector& outputs) { +void FlexModelTest::AddTfLiteMulOp(const std::vector& inputs, + const std::vector& outputs) { static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; reg.builtin_code = BuiltinOperator_MUL; reg.prepare = [](TfLiteContext* context, TfLiteNode* node) { @@ -90,8 +89,8 @@ void EagerModelTest::AddTfLiteMulOp(const std::vector& inputs, kTfLiteOk); } -void EagerModelTest::AddTfOp(TfOpType op, const std::vector& inputs, - const std::vector& outputs) { +void FlexModelTest::AddTfOp(TfOpType op, const std::vector& inputs, + const std::vector& outputs) { auto attr = [](const string& key, const string& value) { return " attr{ key: '" + key + "' value {" + value + "}}"; }; @@ -107,28 +106,28 @@ void EagerModelTest::AddTfOp(TfOpType op, const std::vector& inputs, if (op == kUnpack) { string attributes = type_attribute + attr("num", "i: 2") + attr("axis", "i: 0"); - AddTfOp("EagerUnpack", "Unpack", attributes, inputs, outputs); + AddTfOp("FlexUnpack", "Unpack", attributes, inputs, outputs); } else if (op == kIdentity) { string attributes = type_attribute; - AddTfOp("EagerIdentity", "Identity", attributes, inputs, outputs); + AddTfOp("FlexIdentity", "Identity", attributes, inputs, outputs); } else if (op == kAdd) { string attributes = type_attribute; - AddTfOp("EagerAdd", "Add", attributes, inputs, outputs); + AddTfOp("FlexAdd", "Add", attributes, inputs, outputs); } else if (op == kMul) { string attributes = type_attribute; - AddTfOp("EagerMul", "Mul", attributes, inputs, outputs); + AddTfOp("FlexMul", "Mul", attributes, inputs, outputs); } else if (op == kNonExistent) { AddTfOp("NonExistentOp", "NonExistentOp", "", inputs, outputs); } else if (op == kIncompatibleNodeDef) { // "Cast" op is created without attributes - making it incompatible. - AddTfOp("EagerCast", "Cast", "", inputs, outputs); + AddTfOp("FlexCast", "Cast", "", inputs, outputs); } } -void EagerModelTest::AddTfOp(const char* tflite_name, const string& tf_name, - const string& nodedef_str, - const std::vector& inputs, - const std::vector& outputs) { +void FlexModelTest::AddTfOp(const char* tflite_name, const string& tf_name, + const string& nodedef_str, + const std::vector& inputs, + const std::vector& outputs) { static TfLiteRegistration reg = {nullptr, nullptr, nullptr, nullptr}; reg.builtin_code = BuiltinOperator_CUSTOM; reg.custom_name = tflite_name; @@ -154,5 +153,5 @@ void EagerModelTest::AddTfOp(const char* tflite_name, const string& tf_name, } } // namespace testing -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/test_util.h b/tensorflow/contrib/lite/delegates/flex/test_util.h similarity index 90% rename from tensorflow/contrib/lite/delegates/eager/test_util.h rename to tensorflow/contrib/lite/delegates/flex/test_util.h index 816db41931..a8c81b90a3 100644 --- a/tensorflow/contrib/lite/delegates/eager/test_util.h +++ b/tensorflow/contrib/lite/delegates/flex/test_util.h @@ -13,14 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_ #include "tensorflow/c/c_api_internal.h" #include "tensorflow/contrib/lite/kernels/test_util.h" namespace tflite { -namespace eager { +namespace flex { namespace testing { enum TfOpType { @@ -35,12 +35,12 @@ enum TfOpType { }; // This class creates models with TF and TFLite ops. In order to use this class -// to test the Eager delegate, implement a function that calls +// to test the Flex delegate, implement a function that calls // interpreter->ModifyGraphWithDelegate. -class EagerModelTest : public ::testing::Test { +class FlexModelTest : public ::testing::Test { public: - EagerModelTest() {} - ~EagerModelTest() {} + FlexModelTest() {} + ~FlexModelTest() {} bool Invoke(); @@ -104,7 +104,7 @@ class EagerModelTest : public ::testing::Test { private: // Helper method to add a TensorFlow op. tflite_names needs to start with - // "Eager" in order to work with the Eager delegate. + // "Flex" in order to work with the Flex delegate. void AddTfOp(const char* tflite_name, const string& tf_name, const string& nodedef_str, const std::vector& inputs, const std::vector& outputs); @@ -113,7 +113,7 @@ class EagerModelTest : public ::testing::Test { }; } // namespace testing -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_TEST_UTIL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_TEST_UTIL_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/util.cc b/tensorflow/contrib/lite/delegates/flex/util.cc similarity index 96% rename from tensorflow/contrib/lite/delegates/eager/util.cc rename to tensorflow/contrib/lite/delegates/flex/util.cc index 051246bf86..829bc388bf 100644 --- a/tensorflow/contrib/lite/delegates/eager/util.cc +++ b/tensorflow/contrib/lite/delegates/flex/util.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" namespace tflite { -namespace eager { +namespace flex { TfLiteStatus ConvertStatus(TfLiteContext* context, const tensorflow::Status& status) { @@ -100,5 +100,5 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) { } } -} // namespace eager +} // namespace flex } // namespace tflite diff --git a/tensorflow/contrib/lite/delegates/eager/util.h b/tensorflow/contrib/lite/delegates/flex/util.h similarity index 89% rename from tensorflow/contrib/lite/delegates/eager/util.h rename to tensorflow/contrib/lite/delegates/flex/util.h index 930cb99cb9..7f910e7316 100644 --- a/tensorflow/contrib/lite/delegates/eager/util.h +++ b/tensorflow/contrib/lite/delegates/flex/util.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_ -#define TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_ #include "tensorflow/c/c_api_internal.h" #include "tensorflow/contrib/lite/c/c_api_internal.h" @@ -21,7 +21,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" namespace tflite { -namespace eager { +namespace flex { // Converts a tensorflow:Status into a TfLiteStatus. If the original status // represented an error, reports it using the given 'context'. @@ -41,7 +41,7 @@ TF_DataType GetTensorFlowDataType(TfLiteType type); // Returns the TfLiteType that corresponds to the given TF C API Data type. TfLiteType GetTensorFlowLiteType(TF_DataType); -} // namespace eager +} // namespace flex } // namespace tflite -#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_EAGER_UTIL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_DELEGATES_FLEX_UTIL_H_ diff --git a/tensorflow/contrib/lite/delegates/eager/util_test.cc b/tensorflow/contrib/lite/delegates/flex/util_test.cc similarity index 97% rename from tensorflow/contrib/lite/delegates/eager/util_test.cc rename to tensorflow/contrib/lite/delegates/flex/util_test.cc index aebc91149c..5f049e7b0a 100644 --- a/tensorflow/contrib/lite/delegates/eager/util_test.cc +++ b/tensorflow/contrib/lite/delegates/flex/util_test.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/contrib/lite/delegates/eager/util.h" +#include "tensorflow/contrib/lite/delegates/flex/util.h" #include @@ -22,7 +22,7 @@ limitations under the License. #include "tensorflow/contrib/lite/testing/util.h" namespace tflite { -namespace eager { +namespace flex { namespace { using tensorflow::DT_FLOAT; @@ -132,7 +132,7 @@ TEST(UtilTest, TypeConversionsFromTensorFlow) { } } // namespace -} // namespace eager +} // namespace flex } // namespace tflite int main(int argc, char** argv) { diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 2f4b663a28..9402105fa7 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -125,7 +125,7 @@ TfLiteStatus UnsupportedTensorFlowOp(TfLiteContext* context, TfLiteNode* node) { context->ReportError( context, "Regular TensorFlow ops are not supported by this interpreter. Make sure " - "you invoke the Eager delegate before inference."); + "you invoke the Flex delegate before inference."); return kTfLiteError; } @@ -136,13 +136,13 @@ const TfLiteRegistration* BuiltinOpResolver::FindOp(tflite::BuiltinOperator op, const TfLiteRegistration* BuiltinOpResolver::FindOp(const char* op, int version) const { - // Return the NULL Op for all ops whose name start with "Eager", allowing + // Return the NULL Op for all ops whose name start with "Flex", allowing // the interpreter to delegate their execution. - if (IsEagerOp(op)) { + if (IsFlexOp(op)) { static TfLiteRegistration null_op{ nullptr, nullptr, &UnsupportedTensorFlowOp, nullptr, nullptr, BuiltinOperator_CUSTOM, - "Eager", 1}; + "Flex", 1}; return &null_op; } return MutableOpResolver::FindOp(op, version); diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index ea2817beec..eff6181a61 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -28,7 +28,7 @@ limitations under the License. #include "tensorflow/contrib/lite/nnapi_delegate.h" #endif #if defined(TFLITE_EXTENDED) -#include "tensorflow/contrib/lite/delegates/eager/delegate.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" #endif #include "tensorflow/contrib/lite/version.h" @@ -451,7 +451,7 @@ TfLiteStatus InterpreterBuilder::operator()( (**interpreter).SetVariables(std::move(variables)); #if defined(TFLITE_EXTENDED) - if (auto delegate = EagerDelegate::Create()) { + if (auto delegate = FlexDelegate::Create()) { (**interpreter) .ModifyGraphWithDelegate(std::move(delegate), /*allow_dynamic_tensors=*/true); diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py index 627be8f44f..73a420c47b 100644 --- a/tensorflow/contrib/lite/python/convert.py +++ b/tensorflow/contrib/lite/python/convert.py @@ -241,10 +241,10 @@ def build_toco_convert_protos(input_tensors, toco.dump_graphviz_dir = dump_graphviz_dir toco.dump_graphviz_include_video = dump_graphviz_video if converter_mode == ConverterMode.TOCO_EXTENDED: - toco.allow_eager_ops = True + toco.allow_flex_ops = True elif converter_mode == ConverterMode.TOCO_EXTENDED_ALL: - toco.allow_eager_ops = True - toco.force_eager_ops = True + toco.allow_flex_ops = True + toco.force_flex_ops = True model = _model_flags_pb2.ModelFlags() model.change_concat_input_ranges = change_concat_input_ranges diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py index 33f8fc1e8c..7b0df01d1d 100644 --- a/tensorflow/contrib/lite/python/lite_test.py +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -432,7 +432,7 @@ class FromSessionTest(test_util.TensorFlowTestCase): interpreter.allocate_tensors() self.assertIn( 'Regular TensorFlow ops are not supported by this interpreter. Make ' - 'sure you invoke the Eager delegate before inference.', + 'sure you invoke the Flex delegate before inference.', str(error.exception)) def testFloatTocoConverter(self): diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 55ef1172b2..f0bfec2338 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -164,7 +164,7 @@ cc_library( ":test_runner", "//tensorflow/contrib/lite:builtin_op_data", "//tensorflow/contrib/lite:framework", - "//tensorflow/contrib/lite/delegates/eager:delegate", + "//tensorflow/contrib/lite/delegates/flex:delegate", "//tensorflow/contrib/lite/kernels:builtin_ops", ], ) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 014c80b5ef..53bd88d087 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -343,7 +343,7 @@ def toco_convert(graph_def_str, input_tensors, output_tensors, opts = ("--input_arrays={0} --output_arrays={1}".format( ",".join(input_arrays), ",".join(output_tensors))) elif FLAGS.run_with_extended: - opts += " --allow_eager_ops --force_eager_ops" + opts += " --allow_flex_ops --force_flex_ops" cmd = ("%s --input_file=%s --output_file=%s %s > %s 2>&1" % (bin_path, graphdef_file.name, output_file.name, opts, stdout_file.name)) diff --git a/tensorflow/contrib/lite/testing/tflite_diff_flags.h b/tensorflow/contrib/lite/testing/tflite_diff_flags.h index 3874bc31d7..ad889a2f19 100644 --- a/tensorflow/contrib/lite/testing/tflite_diff_flags.h +++ b/tensorflow/contrib/lite/testing/tflite_diff_flags.h @@ -57,7 +57,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) { "[optional] Number of full runs in each pass."), tensorflow::Flag("delegate", &values.delegate, "[optional] Delegate to use for executing ops. Must be " - "`{\"\", EAGER}`"), + "`{\"\", FLEX}`"), }; bool no_inputs = *argc == 1; @@ -70,7 +70,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) { values.input_layer_shape.empty() || values.output_layer.empty()) { fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str()); return {}; - } else if (!(values.delegate == "" || values.delegate == "EAGER")) { + } else if (!(values.delegate == "" || values.delegate == "FLEX")) { fprintf(stderr, "%s", tensorflow::Flags::Usage(argv[0], flags).c_str()); return {}; } diff --git a/tensorflow/contrib/lite/testing/tflite_diff_util.h b/tensorflow/contrib/lite/testing/tflite_diff_util.h index f67992139f..28b14bd143 100644 --- a/tensorflow/contrib/lite/testing/tflite_diff_util.h +++ b/tensorflow/contrib/lite/testing/tflite_diff_util.h @@ -45,7 +45,7 @@ struct DiffOptions { // second pass does multiple inferences back to back. int num_runs_per_pass; // Path to the delegate library to be loaded in order to execute ops. Must be - // `{"", EAGER}`. + // `{"", FLEX}`. string delegate; }; diff --git a/tensorflow/contrib/lite/testing/tflite_driver.cc b/tensorflow/contrib/lite/testing/tflite_driver.cc index 17aa8cb293..ef49e6f8bc 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.cc +++ b/tensorflow/contrib/lite/testing/tflite_driver.cc @@ -17,7 +17,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/builtin_op_data.h" -#include "tensorflow/contrib/lite/delegates/eager/delegate.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" #include "tensorflow/contrib/lite/testing/split.h" namespace tflite { @@ -138,8 +138,8 @@ class TfLiteDriver::Expectation { TfLiteDriver::TfLiteDriver(bool use_nnapi, const string& delegate_name) : use_nnapi_(use_nnapi) { - if (delegate_name == "EAGER") { - delegate_ = EagerDelegate::Create(); + if (delegate_name == "FLEX") { + delegate_ = FlexDelegate::Create(); } } diff --git a/tensorflow/contrib/lite/testing/tflite_driver.h b/tensorflow/contrib/lite/testing/tflite_driver.h index aed35f877d..dc2a4e5877 100644 --- a/tensorflow/contrib/lite/testing/tflite_driver.h +++ b/tensorflow/contrib/lite/testing/tflite_driver.h @@ -17,7 +17,7 @@ limitations under the License. #include -#include "tensorflow/contrib/lite/delegates/eager/delegate.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" #include "tensorflow/contrib/lite/interpreter.h" #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/model.h" @@ -53,7 +53,7 @@ class TfLiteDriver : public TestRunner { class Expectation; - std::unique_ptr delegate_; + std::unique_ptr delegate_; bool use_nnapi_ = false; std::unique_ptr model_; std::unique_ptr interpreter_; diff --git a/tensorflow/contrib/lite/toco/args.h b/tensorflow/contrib/lite/toco/args.h index f14dbc258b..2699ac76e1 100644 --- a/tensorflow/contrib/lite/toco/args.h +++ b/tensorflow/contrib/lite/toco/args.h @@ -248,9 +248,9 @@ struct ParsedTocoFlags { Arg dedupe_array_min_size_bytes = Arg(64); Arg split_tflite_lstm_inputs = Arg(true); // WARNING: Experimental interface, subject to change - Arg allow_eager_ops = Arg(false); + Arg allow_flex_ops = Arg(false); // WARNING: Experimental interface, subject to change - Arg force_eager_ops = Arg(false); + Arg force_flex_ops = Arg(false); }; } // namespace toco diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index e02d000e7e..5eaf6e27fc 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -2123,9 +2123,9 @@ std::unique_ptr ImportTensorFlowGraphDef( Model* model = new Model; internal::ConverterMapType converter_map; - // This is used for the TFLite "Full Eager Mode" conversion. All the ops are + // This is used for the TFLite "Full Flex Mode" conversion. All the ops are // imported as `TensorFlowUnsupportedOperator`, and later all these ops are - // converted to TFLite Eager ops. + // converted to TFLite Flex ops. if (!tf_import_flags.import_all_ops_as_unsupported) { converter_map = internal::GetTensorFlowNodeConverterMap(); } diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.h b/tensorflow/contrib/lite/toco/import_tensorflow.h index 7db23f2d44..c5ff96956a 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.h +++ b/tensorflow/contrib/lite/toco/import_tensorflow.h @@ -30,7 +30,7 @@ struct TensorFlowImportFlags { // Do not recognize any op and import all ops as // `TensorFlowUnsupportedOperator`. This is used to populated with the - // `force_eager_ops` flag. + // `force_flex_ops` flag. bool import_all_ops_as_unsupported = false; }; diff --git a/tensorflow/contrib/lite/toco/tflite/export.cc b/tensorflow/contrib/lite/toco/tflite/export.cc index 9f60942f47..0c9fac249c 100644 --- a/tensorflow/contrib/lite/toco/tflite/export.cc +++ b/tensorflow/contrib/lite/toco/tflite/export.cc @@ -50,16 +50,16 @@ namespace { details::OperatorKey GetOperatorKey( const ::toco::Operator& op, const std::map>& ops_by_type, - bool allow_eager_ops) { + bool allow_flex_ops) { string custom_code; if (op.type == OperatorType::kUnsupported) { const TensorFlowUnsupportedOperator& unsupported_op = static_cast(op); - // TODO(b/113715895): When `allow_eager_ops` is on, for now there's no way + // TODO(b/113715895): When `allow_flex_ops` is on, for now there's no way // to populate a regular custom op. We need to find a way to fix this. - if (allow_eager_ops) { - custom_code = string(::tflite::kEagerCustomCodePrefix) + + if (allow_flex_ops) { + custom_code = string(::tflite::kFlexCustomCodePrefix) + unsupported_op.tensorflow_op; } else { custom_code = unsupported_op.tensorflow_op; @@ -101,11 +101,11 @@ void LoadTensorsMap(const Model& model, TensorsMap* tensors_map) { void LoadOperatorsMap( const Model& model, OperatorsMap* operators_map, const std::map>& ops_by_type, - bool allow_eager_ops) { + bool allow_flex_ops) { // First find a list of unique operator types. std::set keys; for (const auto& op : model.operators) { - keys.insert(GetOperatorKey(*op, ops_by_type, allow_eager_ops)); + keys.insert(GetOperatorKey(*op, ops_by_type, allow_flex_ops)); } // Now assign indices to them and fill in the map. int index = 0; @@ -216,7 +216,7 @@ Offset>> ExportOperatorCodes( for (const auto& op : model.operators) { const details::OperatorKey operator_key = - GetOperatorKey(*op, ops_by_type, params.allow_eager_ops); + GetOperatorKey(*op, ops_by_type, params.allow_flex_ops); int op_index = operators_map.at(operator_key); int op_version = operator_key.version; @@ -281,7 +281,7 @@ Offset>> ExportOperators( } int op_index = operators_map.at( - GetOperatorKey(*op, ops_by_type, params.allow_eager_ops)); + GetOperatorKey(*op, ops_by_type, params.allow_flex_ops)); auto tflite_op_it = ops_by_type.find(op->type); BaseOperator* tflite_op = tflite_op_it == ops_by_type.end() @@ -334,7 +334,7 @@ Offset>> ExportBuffers( void Export(const Model& model, string* output_file_contents, const ExportParams& params) { - const auto ops_by_type = BuildOperatorByTypeMap(params.allow_eager_ops); + const auto ops_by_type = BuildOperatorByTypeMap(params.allow_flex_ops); Export(model, output_file_contents, params, ops_by_type); } @@ -349,7 +349,7 @@ void Export( details::OperatorsMap operators_map; details::LoadOperatorsMap(model, &operators_map, ops_by_type, - params.allow_eager_ops); + params.allow_flex_ops); std::vector buffers_to_write; Array empty_array; diff --git a/tensorflow/contrib/lite/toco/tflite/export.h b/tensorflow/contrib/lite/toco/tflite/export.h index b070a38768..29d6de4049 100644 --- a/tensorflow/contrib/lite/toco/tflite/export.h +++ b/tensorflow/contrib/lite/toco/tflite/export.h @@ -26,7 +26,7 @@ namespace tflite { // The parameters for exporting a TFLite model. struct ExportParams { bool allow_custom_ops = false; - bool allow_eager_ops = false; + bool allow_flex_ops = false; bool quantize_weights = false; }; @@ -121,7 +121,7 @@ void LoadTensorsMap(const Model& model, TensorsMap* tensors_map); void LoadOperatorsMap( const Model& model, OperatorsMap* operators_map, const std::map>& ops_by_type, - bool allow_eager_ops); + bool allow_flex_ops); } // namespace details } // namespace tflite diff --git a/tensorflow/contrib/lite/toco/tflite/export_test.cc b/tensorflow/contrib/lite/toco/tflite/export_test.cc index 8d4d197c46..93882a91a7 100644 --- a/tensorflow/contrib/lite/toco/tflite/export_test.cc +++ b/tensorflow/contrib/lite/toco/tflite/export_test.cc @@ -105,7 +105,7 @@ TEST_F(ExportTest, LoadOperatorsMap) { details::OperatorsMap operators; const auto ops_by_type = BuildOperatorByTypeMap(); - // TODO(ycling): Add a test for allow_eager_ops. + // TODO(ycling): Add a test for allow_flex_ops. details::LoadOperatorsMap(input_model_, &operators, ops_by_type, false); EXPECT_EQ(0, operators[details::OperatorKey(OperatorType::kAdd, "", 1)]); EXPECT_EQ(1, operators[details::OperatorKey(OperatorType::kConv, "", 1)]); diff --git a/tensorflow/contrib/lite/toco/tflite/operator.cc b/tensorflow/contrib/lite/toco/tflite/operator.cc index ca2a6a19b3..9addbb81e7 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.cc +++ b/tensorflow/contrib/lite/toco/tflite/operator.cc @@ -1160,8 +1160,8 @@ class Unpack : public BuiltinOperator Deserialize( const BuiltinOptions* builtin_options, const CustomOptions* custom_options) const override { - // Deserializing Eager ops doesn't work now. + // Deserializing Flex ops doesn't work now. // TODO(ycling): Revisit and decide if we should fix the flow for importing - // TFLite models with Eager ops. + // TFLite models with Flex ops. auto op = absl::make_unique(); if (custom_options) { auto flexbuffer_map = @@ -1200,13 +1200,13 @@ class TensorFlowUnsupported : public BaseOperator { return std::unique_ptr(); } - if (allow_eager_ops_) { + if (allow_flex_ops_) { fbb->Vector([&]() { fbb->String(node_def.op()); fbb->String(op.tensorflow_node_def); }); fbb->Finish(); - LOG(INFO) << "Writing eager op: " << node_def.op(); + LOG(INFO) << "Writing flex op: " << node_def.op(); return std::unique_ptr(fbb.release()); } @@ -1316,13 +1316,13 @@ class TensorFlowUnsupported : public BaseOperator { } private: - const bool allow_eager_ops_; + const bool allow_flex_ops_; }; namespace { // Build a vector containing all the known operators. std::vector> BuildOperatorList( - bool allow_eager_ops = false) { + bool allow_flex_ops = false) { std::vector> ops; using tensorflow::MakeUnique; // Builtin Operators. @@ -1434,7 +1434,7 @@ std::vector> BuildOperatorList( ops.push_back(MakeUnique( "CTC_BEAM_SEARCH_DECODER", OperatorType::kCTCBeamSearchDecoder)); ops.push_back(MakeUnique( - "TENSORFLOW_UNSUPPORTED", OperatorType::kUnsupported, allow_eager_ops)); + "TENSORFLOW_UNSUPPORTED", OperatorType::kUnsupported, allow_flex_ops)); // There operators are supported by Toco, but not by TF Lite, and has no // attributes. @@ -1512,11 +1512,11 @@ std::vector> BuildOperatorList( } // namespace std::map> BuildOperatorByTypeMap( - bool allow_eager_ops) { + bool allow_flex_ops) { std::map> result; std::vector> ops = - BuildOperatorList(allow_eager_ops); + BuildOperatorList(allow_flex_ops); for (auto& op : ops) { result[op->type()] = std::move(op); } @@ -1525,11 +1525,11 @@ std::map> BuildOperatorByTypeMap( } std::map> BuildOperatorByNameMap( - bool allow_eager_ops) { + bool allow_flex_ops) { std::map> result; std::vector> ops = - BuildOperatorList(allow_eager_ops); + BuildOperatorList(allow_flex_ops); for (auto& op : ops) { result[op->name()] = std::move(op); } diff --git a/tensorflow/contrib/lite/toco/tflite/operator.h b/tensorflow/contrib/lite/toco/tflite/operator.h index 702fb28ea6..13d9f6c49a 100644 --- a/tensorflow/contrib/lite/toco/tflite/operator.h +++ b/tensorflow/contrib/lite/toco/tflite/operator.h @@ -26,15 +26,15 @@ namespace tflite { class BaseOperator; // Return a map contained all know TF Lite Operators, keyed by their names. -// TODO(ycling): The pattern to propagate parameters (e.g. allow_eager_ops) +// TODO(ycling): The pattern to propagate parameters (e.g. allow_flex_ops) // is ugly here. Consider refactoring. std::map> BuildOperatorByNameMap( - bool allow_eager_ops = false); + bool allow_flex_ops = false); // Return a map contained all know TF Lite Operators, keyed by the type of // their tf.mini counterparts. std::map> BuildOperatorByTypeMap( - bool allow_eager_ops = false); + bool allow_flex_ops = false); // These are the flatbuffer types for custom and builtin options. using CustomOptions = flatbuffers::Vector; diff --git a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc index b6aebc0470..cff79776bc 100644 --- a/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc +++ b/tensorflow/contrib/lite/toco/toco_cmdline_flags.cc @@ -167,11 +167,11 @@ bool ParseTocoFlagsFromCommandLineFlags( "converted float model. Model size will be reduced and there will " "be latency improvements (at the cost of accuracy)."), // WARNING: Experimental interface, subject to change - Flag("allow_eager_ops", parsed_flags.allow_eager_ops.bind(), - parsed_flags.allow_eager_ops.default_value(), ""), + Flag("allow_flex_ops", parsed_flags.allow_flex_ops.bind(), + parsed_flags.allow_flex_ops.default_value(), ""), // WARNING: Experimental interface, subject to change - Flag("force_eager_ops", parsed_flags.force_eager_ops.bind(), - parsed_flags.force_eager_ops.default_value(), "")}; + Flag("force_flex_ops", parsed_flags.force_flex_ops.bind(), + parsed_flags.force_flex_ops.default_value(), "")}; bool asked_for_help = *argc == 2 && (!strcmp(argv[1], "--help") || !strcmp(argv[1], "-help")); if (asked_for_help) { @@ -266,15 +266,15 @@ void ReadTocoFlagsFromCommandLineFlags(const ParsedTocoFlags& parsed_toco_flags, READ_TOCO_FLAG(split_tflite_lstm_inputs, FlagRequirement::kNone); READ_TOCO_FLAG(quantize_weights, FlagRequirement::kNone); READ_TOCO_FLAG(post_training_quantize, FlagRequirement::kNone); - READ_TOCO_FLAG(allow_eager_ops, FlagRequirement::kNone); - READ_TOCO_FLAG(force_eager_ops, FlagRequirement::kNone); + READ_TOCO_FLAG(allow_flex_ops, FlagRequirement::kNone); + READ_TOCO_FLAG(force_flex_ops, FlagRequirement::kNone); - if (parsed_toco_flags.force_eager_ops.value() && - !parsed_toco_flags.allow_eager_ops.value()) { - // TODO(ycling): Consider to enforce `allow_eager_ops` when - // `force_eager_ops` is true. - LOG(WARNING) << "--force_eager_ops should always be used with " - "--allow_eager_ops."; + if (parsed_toco_flags.force_flex_ops.value() && + !parsed_toco_flags.allow_flex_ops.value()) { + // TODO(ycling): Consider to enforce `allow_flex_ops` when + // `force_flex_ops` is true. + LOG(WARNING) << "--force_flex_ops should always be used with " + "--allow_flex_ops."; } // Deprecated flag handling. diff --git a/tensorflow/contrib/lite/toco/toco_flags.proto b/tensorflow/contrib/lite/toco/toco_flags.proto index 53d60fed05..ca3e64485e 100644 --- a/tensorflow/contrib/lite/toco/toco_flags.proto +++ b/tensorflow/contrib/lite/toco/toco_flags.proto @@ -190,16 +190,16 @@ message TocoFlags { // (at the cost of accuracy). optional bool post_training_quantize = 26 [default = false]; - // When enabled, unsupported ops will be converted to TFLite Eager ops. + // When enabled, unsupported ops will be converted to TFLite Flex ops. // TODO(ycling): Consider to rename the following 2 flags and don't call it - // "Eager". - // `allow_eager_ops` should always be used with `allow_custom_ops`. + // "Flex". + // `allow_flex_ops` should always be used with `allow_custom_ops`. // WARNING: Experimental interface, subject to change - optional bool allow_eager_ops = 27 [default = false]; + optional bool allow_flex_ops = 27 [default = false]; - // When enabled, all TensorFlow ops will be converted to TFLite Eager - // ops directly. This will force `allow_eager_ops` to true. - // `force_eager_ops` should always be used with `allow_eager_ops`. + // When enabled, all TensorFlow ops will be converted to TFLite Flex + // ops directly. This will force `allow_flex_ops` to true. + // `force_flex_ops` should always be used with `allow_flex_ops`. // WARNING: Experimental interface, subject to change - optional bool force_eager_ops = 28 [default = false]; + optional bool force_flex_ops = 28 [default = false]; } diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index a08b02485f..106494f354 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -198,7 +198,7 @@ std::unique_ptr Import(const TocoFlags& toco_flags, : (toco_flags.output_format() != TENSORFLOW_GRAPHDEF); tf_import_flags.import_all_ops_as_unsupported = - toco_flags.force_eager_ops(); + toco_flags.force_flex_ops(); model = ImportTensorFlowGraphDef(model_flags, tf_import_flags, input_file_contents); @@ -409,9 +409,9 @@ void Export(const TocoFlags& toco_flags, const Model& model, case TFLITE: { toco::tflite::ExportParams params; - // Always allow custom ops when eager ops are allowed. - if (toco_flags.force_eager_ops() || toco_flags.allow_eager_ops()) { - params.allow_eager_ops = true; + // Always allow custom ops when flex ops are allowed. + if (toco_flags.force_flex_ops() || toco_flags.allow_flex_ops()) { + params.allow_flex_ops = true; params.allow_custom_ops = true; } else if (allow_custom_ops) { params.allow_custom_ops = true; diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD index dc97d22401..bc18d40313 100644 --- a/tensorflow/contrib/lite/tools/benchmark/BUILD +++ b/tensorflow/contrib/lite/tools/benchmark/BUILD @@ -36,7 +36,7 @@ cc_binary( ) cc_binary( - name = "benchmark_model_plus_eager", + name = "benchmark_model_plus_flex", srcs = [ "benchmark_main.cc", ], @@ -49,7 +49,7 @@ cc_binary( "//conditions:default": [], }), deps = [ - ":benchmark_tflite_model_plus_eager_lib", + ":benchmark_tflite_model_plus_flex_lib", ":logging", ], ) @@ -111,7 +111,7 @@ cc_library( ) cc_library( - name = "benchmark_tflite_model_plus_eager_lib", + name = "benchmark_tflite_model_plus_flex_lib", srcs = [ "benchmark_tflite_model.cc", "logging.h", @@ -123,7 +123,7 @@ cc_library( ":logging", "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite:string_util", - "//tensorflow/contrib/lite/delegates/eager:delegate", + "//tensorflow/contrib/lite/delegates/flex:delegate", "//tensorflow/contrib/lite/kernels:builtin_ops", "//tensorflow/contrib/lite/profiling:profile_summarizer", ], diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc index ef4f0fa80d..d989ee720d 100644 --- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc @@ -24,7 +24,7 @@ limitations under the License. #include #ifdef TFLITE_EXTENDED -#include "tensorflow/contrib/lite/delegates/eager/delegate.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" #endif // TFLITE_EXTENDED #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/model.h" @@ -306,8 +306,8 @@ void BenchmarkTfLiteModel::Init() { interpreter->UseNNAPI(use_nnapi); #ifdef TFLITE_EXTENDED - TFLITE_LOG(INFO) << "Instantiating Eager Delegate"; - delegate_ = EagerDelegate::Create(); + TFLITE_LOG(INFO) << "Instantiating Flex Delegate"; + delegate_ = FlexDelegate::Create(); if (delegate_) { interpreter->ModifyGraphWithDelegate(delegate_.get(), /*allow_dynamic_tensors=*/true); diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h index 8541512bc8..9343824b4a 100644 --- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h @@ -21,7 +21,7 @@ limitations under the License. #include #ifdef TFLITE_EXTENDED -#include "tensorflow/contrib/lite/delegates/eager/delegate.h" +#include "tensorflow/contrib/lite/delegates/flex/delegate.h" #endif // TFLITE_EXTENDED #include "tensorflow/contrib/lite/model.h" #include "tensorflow/contrib/lite/profiling/profile_summarizer.h" @@ -74,7 +74,7 @@ class BenchmarkTfLiteModel : public BenchmarkModel { private: #ifdef TFLITE_EXTENDED - std::unique_ptr delegate_; + std::unique_ptr delegate_; #endif // TFLITE_EXTENDED std::unique_ptr model; std::unique_ptr interpreter; diff --git a/tensorflow/contrib/lite/util.cc b/tensorflow/contrib/lite/util.cc index 7950653da9..6aa35b5227 100644 --- a/tensorflow/contrib/lite/util.cc +++ b/tensorflow/contrib/lite/util.cc @@ -18,9 +18,9 @@ limitations under the License. namespace tflite { -bool IsEagerOp(const char* custom_name) { - return custom_name && strncmp(custom_name, kEagerCustomCodePrefix, - strlen(kEagerCustomCodePrefix)) == 0; +bool IsFlexOp(const char* custom_name) { + return custom_name && strncmp(custom_name, kFlexCustomCodePrefix, + strlen(kFlexCustomCodePrefix)) == 0; } TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input) { diff --git a/tensorflow/contrib/lite/util.h b/tensorflow/contrib/lite/util.h index 6d81f844f8..31292a6f81 100644 --- a/tensorflow/contrib/lite/util.h +++ b/tensorflow/contrib/lite/util.h @@ -26,15 +26,15 @@ limitations under the License. namespace tflite { -// The prefix of Eager op custom code. +// The prefix of Flex op custom code. // This will be matched agains the `custom_code` field in `OperatorCode` // Flatbuffer Table. // WARNING: This is an experimental API and subject to change. -constexpr char kEagerCustomCodePrefix[] = "Eager"; +constexpr char kFlexCustomCodePrefix[] = "Flex"; // Checks whether the prefix of the custom name indicates the operation is an -// Eager operation. -bool IsEagerOp(const char* custom_name); +// Flex operation. +bool IsFlexOp(const char* custom_name); // Converts a `std::vector` to a `TfLiteIntArray`. The caller takes ownership // of the returned pointer. diff --git a/tensorflow/contrib/lite/util_test.cc b/tensorflow/contrib/lite/util_test.cc index c5c1709f1d..25f3aded71 100644 --- a/tensorflow/contrib/lite/util_test.cc +++ b/tensorflow/contrib/lite/util_test.cc @@ -41,14 +41,14 @@ TEST(ConvertVectorToTfLiteIntArray, TestWithEmptyVector) { TfLiteIntArrayFree(output); } -TEST(UtilTest, IsEagerOp) { - EXPECT_TRUE(IsEagerOp("Eager")); - EXPECT_TRUE(IsEagerOp("EagerOp")); - EXPECT_FALSE(IsEagerOp("eager")); - EXPECT_FALSE(IsEagerOp("Eage")); - EXPECT_FALSE(IsEagerOp("OpEager")); - EXPECT_FALSE(IsEagerOp(nullptr)); - EXPECT_FALSE(IsEagerOp("")); +TEST(UtilTest, IsFlexOp) { + EXPECT_TRUE(IsFlexOp("Flex")); + EXPECT_TRUE(IsFlexOp("FlexOp")); + EXPECT_FALSE(IsFlexOp("flex")); + EXPECT_FALSE(IsFlexOp("Fle")); + EXPECT_FALSE(IsFlexOp("OpFlex")); + EXPECT_FALSE(IsFlexOp(nullptr)); + EXPECT_FALSE(IsFlexOp("")); } } // namespace -- GitLab From 8276ef6088ecedd4a5f62a8eacd35a075a43746c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 15:07:12 -0700 Subject: [PATCH 098/570] Updates Interpreter to be initialized with a MappedByteBuffer for backward compatibility. PiperOrigin-RevId: 214843130 --- .../java/org/tensorflow/lite/Interpreter.java | 15 +++++++++++++++ .../java/org/tensorflow/lite/InterpreterTest.java | 4 ++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index eacfa0c827..5cc6e754f3 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -17,6 +17,7 @@ package org.tensorflow.lite; import java.io.File; import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; import java.util.HashMap; import java.util.Map; import org.checkerframework.checker.nullness.qual.NonNull; @@ -148,6 +149,20 @@ public final class Interpreter implements AutoCloseable { this(byteBuffer, new Options().setNumThreads(numThreads)); } + /** + * Initializes a {@code Interpreter} with a {@code MappedByteBuffer} to the model file. + * + *

The {@code MappedByteBuffer} should remain unchanged after the construction of a {@code + * Interpreter}. + * + * @deprecated Prefer using the {@link #Interpreter(ByteBuffer,Options)} constructor. This method + * will be removed in a future release. + */ + @Deprecated + public Interpreter(@NonNull MappedByteBuffer mappedByteBuffer) { + this(mappedByteBuffer, /* options= */ null); + } + /** * Initializes a {@code Interpreter} with a {@code ByteBuffer} of a model file and a set of custom * {@link #Options}. diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java index fdd5063156..a98fca0132 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java @@ -71,7 +71,7 @@ public final class InterpreterTest { Path path = MODEL_FILE.toPath(); FileChannel fileChannel = (FileChannel) Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)); - MappedByteBuffer mappedByteBuffer = + ByteBuffer mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); Interpreter interpreter = new Interpreter(mappedByteBuffer); float[] oneD = {1.23f, 6.54f, 7.81f}; @@ -118,7 +118,7 @@ public final class InterpreterTest { byteBuffer.order(ByteOrder.nativeOrder()); fileChannel.read(byteBuffer); try { - Interpreter interpreter = new Interpreter(byteBuffer); + new Interpreter(byteBuffer); fail(); } catch (IllegalArgumentException e) { assertThat(e) -- GitLab From 17320a0543de32715159a732be065a55a3d990db Mon Sep 17 00:00:00 2001 From: Russell Power Date: Thu, 27 Sep 2018 15:28:50 -0700 Subject: [PATCH 099/570] Fix heartbeat probing. PiperOrigin-RevId: 214846488 --- .../contrib/tpu/python/tpu/session_support.py | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/session_support.py b/tensorflow/contrib/tpu/python/tpu/session_support.py index 24b9bd136b..05264f5a46 100644 --- a/tensorflow/contrib/tpu/python/tpu/session_support.py +++ b/tensorflow/contrib/tpu/python/tpu/session_support.py @@ -44,21 +44,25 @@ class CoordinatorShutdownException(Exception): def _make_heartbeat_op(session, device, request_ph): """Return a heartbeat op or None if heartbeats are not supported by device.""" try: - with ops.device(device): - heartbeat_op = tpu_ops.worker_heartbeat(request_ph) - request = event_pb2.WorkerHeartbeatRequest() - options = config_pb2.RunOptions(timeout_in_ms=5000) - session.run( - heartbeat_op, - feed_dict={request_ph: request.SerializeToString()}, - options=options) - return heartbeat_op + # Test if we can connect in a isolated graph + session + with ops.Graph().as_default(): + with session_lib.Session(target=session.sess_str) as temp_session: + with ops.device(device): + heartbeat_op = tpu_ops.worker_heartbeat('') + options = config_pb2.RunOptions(timeout_in_ms=5000) + temp_session.run(heartbeat_op, options=options) except errors.InvalidArgumentError as _: + logging.warning('Error running heartbeat on %s', device) return None except errors.DeadlineExceededError as _: logging.warning('Timeout connecting to %s when testing heartbeat', device) return None + # If we successfully connected and pinged the worker, go ahead and construct + # the operation. + with ops.device(device): + return tpu_ops.worker_heartbeat(request_ph) + class WorkerHeartbeatManager(object): """Manages the status/heartbeat monitor for a set of workers.""" @@ -171,7 +175,7 @@ class WorkerHeartbeatManager(object): def all_worker_devices(session): """Return a list of devices for each worker in the system.""" devices = session.list_devices() - return [device.name for device in devices if 'CPU' in device.name] + return [device.name for device in devices if ':CPU:' in device.name] class WatchdogManager(threading.Thread): -- GitLab From a3291ab1f2cb9ea2c4e4b3b9b26ad1a1866dfc50 Mon Sep 17 00:00:00 2001 From: Scott Zhu Date: Thu, 27 Sep 2018 15:32:00 -0700 Subject: [PATCH 100/570] Update function registration with both inference function and forward/backward function pair. PiperOrigin-RevId: 214847027 --- tensorflow/python/eager/function.py | 21 ++++++++++++-- tensorflow/python/eager/function_test.py | 37 +++++++++++++++--------- 2 files changed, 43 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index b28befeb62..dd3e1a3723 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -1328,8 +1328,25 @@ def register(func, *args, **kwargs): "Got type: %s" % type(func)) concrete_func = func.get_concrete_function(*args, **kwargs) graph = ops.get_default_graph() - concrete_func._inference_function.add_to_graph(graph) # pylint: disable=protected-access - # TODO(scottzhu): support concrete_func._backward_graph_function in future. + + # There are two situations for the actual call of a defun: + # 1. If none of the input args are resource variables or watch by any tape, + # it will run the _inference_function of concrete_func for forward pass, and + # the gradient will be generated by standard mechanism. + # 2. Otherwise, defun will create two functions, one for forward pass, and the + # backward pass will be created via tape. + # When registering the function, we put both cases into graph. + # pylint: disable=protected-access + concrete_func._inference_function.add_to_graph(graph) + + if concrete_func._backward_graph_function is None: + concrete_func._construct_backprop_function() + forward_function = concrete_func._forward_function + backward_function = concrete_func._backward_graph_function._inference_function + forward_function.add_to_graph(graph) + backward_function.add_to_graph(graph) + # pylint: enable=protected-access + return concrete_func diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 59faf967c5..34a2648e26 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -1669,12 +1669,23 @@ class FunctionTest(test.TestCase): graph = ops.get_default_graph() # pylint: disable=protected-access - self.assertEqual(len(graph._functions), 2) + self.assertEqual(len(graph._functions), 6) + # two sets of functions, each of them are (inference, forward, backward) functions = list(graph._functions.values()) - pre_register_matmul_func_name = functions[0].definition.signature.name - self.assertRegexpMatches(pre_register_matmul_func_name, '.*matmul.*') - pre_register_add_func_name = functions[1].definition.signature.name - self.assertRegexpMatches(pre_register_add_func_name, '.*add.*') + captured_function_names = [ + f.definition.signature.name for f in functions + ] + expected_func_name_regex = [ + '.*inference.*matmul.*', + '.*forward.*matmul.*', + '.*inference.*backward.*matmul.*', + '.*inference.*add.*', + '.*forward.*add.*', + '.*inference.*backward.*add.*', + ] + for i in range(len(functions)): + self.assertRegexpMatches(captured_function_names[i], + expected_func_name_regex[i]) sq = defun_matmul(t, t) double = add(t, t) @@ -1682,12 +1693,11 @@ class FunctionTest(test.TestCase): self.assertAllEqual(double.eval().reshape(-1), [2, 4, 6, 8]) # Make sure the pre registered function is used, and no other function # is added. - self.assertEqual(len(graph._functions), 2) + self.assertEqual(len(graph._functions), 6) functions = list(graph._functions.values()) - called_func_name = functions[0].definition.signature.name - self.assertEqual(pre_register_matmul_func_name, called_func_name) - called_func_name = functions[1].definition.signature.name - self.assertEqual(pre_register_add_func_name, called_func_name) + for i in range(len(functions)): + self.assertEquals(captured_function_names[i], + functions[i].definition.signature.name) def testRegisterFunctionWithInputSignature(self): def matmul(x, y): @@ -1705,7 +1715,7 @@ class FunctionTest(test.TestCase): graph = ops.get_default_graph() # pylint: disable=protected-access - self.assertEqual(len(graph._functions), 1) + self.assertEqual(len(graph._functions), 3) # Test input param shape mismatch t2 = constant_op.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) @@ -1728,7 +1738,7 @@ class FunctionTest(test.TestCase): graph = ops.get_default_graph() # Only one function is registered since the input param are in same type # pylint: disable=protected-access - self.assertEqual(len(graph._functions), 1) + self.assertEqual(len(graph._functions), 3) def testCallingFunctionWithDifferentVariables(self): @@ -1767,7 +1777,8 @@ class FunctionTest(test.TestCase): 'be Tensors;.*'): graph_function('Not a Tensor.') - def testSwapImplementationWithGrapplerPlugin(self): + # TODO(scottzhu): Revive the test once the grappler plugin is updated. + def disabled_testSwapImplementationWithGrapplerPlugin(self): rewrites = rewriter_config_pb2.RewriterConfig() # function_optimizer has to be turn off, otherwise it will delete the # registered function if it does not get called. -- GitLab From bdab0b3c111bbe1c9656fa2228f1a4d28df5a7bf Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Thu, 27 Sep 2018 15:32:38 -0700 Subject: [PATCH 101/570] Added an experimental API for user to set an internal error status. See https://github.com/apple/swift/pull/19588/files#diff-923cd5ac82727b31d446c23641b3d749 for an example usage. Also removed an experimental API that's no longer needed. PiperOrigin-RevId: 214847132 --- tensorflow/c/c_api_experimental.cc | 34 +++--------------------------- tensorflow/c/c_api_experimental.h | 6 ++---- 2 files changed, 5 insertions(+), 35 deletions(-) diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc index f316e4ba67..d4b78138e9 100644 --- a/tensorflow/c/c_api_experimental.cc +++ b/tensorflow/c/c_api_experimental.cc @@ -8738,35 +8738,7 @@ void TFE_TensorHandlePrintDebugString(TFE_TensorHandle* handle) { TF_DeleteStatus(status); } -TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx) { - // Intentionally LOG into INFO below for ease of debugging. - VLOG(1) << "TFE_RunConstOp called"; - - auto* status = TF_NewStatus(); - auto* op = TFE_NewOp(ctx, "Const", status); - CheckOk(status); - TFE_OpSetAttrType(op, "dtype", TF_FLOAT); - - auto* tensor = - TF_AllocateTensor(TF_FLOAT, /*shape.data()*/ nullptr, /*shape.size()*/ 0, - TF_DataTypeSize(TF_FLOAT) * 1); - auto* ptr = reinterpret_cast(TF_TensorData(tensor)); - *reinterpret_cast(ptr) = 17.0; - - TFE_OpSetAttrTensor(op, "value", tensor, status); - CheckOk(status); - TF_DeleteTensor(tensor); - VLOG(1) << "New op created"; - - TFE_TensorHandle* retval; - int num_retvals = 1; - TFE_Execute(op, &retval, &num_retvals, status); - CheckOk(status); - CHECK_EQ(num_retvals, 1); - VLOG(1) << "Op executed"; - - TFE_DeleteOp(op); - TF_DeleteStatus(status); - - return retval; +TF_CAPI_EXPORT extern void TF_MakeInternalErrorStatus(TF_Status* status, + const char* errMsg) { + status->status = tensorflow::errors::Internal(errMsg); } diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h index 950ad9aeed..d98d532e32 100644 --- a/tensorflow/c/c_api_experimental.h +++ b/tensorflow/c/c_api_experimental.h @@ -180,10 +180,8 @@ TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_DequeueVariantTensor( TF_CAPI_EXPORT extern void TFE_TensorHandlePrintDebugString( TFE_TensorHandle* handle); -// Returns a const scalar tensor. -// Caller owns both the input and the output tensor handles. -// TODO: Remove this API with hard-coded tensor computation. -TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_RunConstOp(TFE_Context* ctx); +TF_CAPI_EXPORT extern void TF_MakeInternalErrorStatus(TF_Status* status, + const char* errMsg); #ifdef __cplusplus } /* end extern "C" */ -- GitLab From 8f85a9de475f0acf0abef4fabc12943e2e487bf7 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Thu, 27 Sep 2018 15:37:49 -0700 Subject: [PATCH 102/570] Do not specify dilation rate to depthwise conv2d. PiperOrigin-RevId: 214848057 --- tensorflow/contrib/quantize/python/fold_batch_norms.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index e5790a6e13..7575b1b6cd 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -418,10 +418,11 @@ def _CloneWithNewOperands(layer_op, input_tensor, weight_tensor, transpose_b=layer_op.get_attr('transpose_b'), name=new_layer_name) elif layer_op.type == 'DepthwiseConv2dNative': + # We don't copy dilation rate because we reuse the input SpaceToBatch + # and create our own BatchToSpace operation below. conv = nn.depthwise_conv2d( input_tensor, weight_tensor, - rate=layer_op.get_attr('dilations'), strides=layer_op.get_attr('strides'), padding=layer_op.get_attr('padding'), name=new_layer_name) -- GitLab From bfec3d54fed955a4b145220e64c48b94fbb04ae7 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Thu, 27 Sep 2018 15:38:48 -0700 Subject: [PATCH 103/570] [XLA] Use a result cache to speed up InstructionFusion::CanFuseOnAllPaths() PiperOrigin-RevId: 214848216 --- .../xla/service/instruction_fusion.cc | 29 ++++++++++++++----- .../compiler/xla/service/instruction_fusion.h | 11 +++++-- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index 3fdc2cee9a..e884122fcb 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -188,13 +188,20 @@ bool InstructionFusion::EffectivelyAtMostUnary(HloInstruction* hlo) { bool InstructionFusion::CanFuseOnAllPaths( HloInstruction* producer, HloInstruction* consumer, - const HloInstructionSet& do_not_duplicate) { + const HloInstructionSet& do_not_fuse, + tensorflow::gtl::FlatMap, bool>* + result_cache) { if (consumer == producer) { return true; } if (!consumer->IsFusible()) { return false; } + auto cache_it = result_cache->find(std::make_pair(producer, consumer)); + if (cache_it != result_cache->end()) { + return cache_it->second; + } + bool result = true; for (int64 i = 0, e = consumer->operand_count(); i < e; ++i) { auto* consumer_operand = consumer->mutable_operand(i); // If the operand is not on a path to the producer, it doesn't matter @@ -202,20 +209,23 @@ bool InstructionFusion::CanFuseOnAllPaths( if (!reachability_->IsReachable(producer, consumer_operand)) { continue; } - if (do_not_duplicate.count(consumer_operand) > 0 || - !ShouldFuse(consumer, i)) { - return false; + if (do_not_fuse.count(consumer_operand) > 0 || !ShouldFuse(consumer, i)) { + result = false; + break; } // The producer is reachable from consumer_operand which means we need // to be able to fuse consumer_operand into consumer in order for // producer to be fusible into consumer on all paths. // Perform the recursive step: make sure producer can be fused into // consumer_operand on all paths. - if (!CanFuseOnAllPaths(producer, consumer_operand, do_not_duplicate)) { - return false; + if (!CanFuseOnAllPaths(producer, consumer_operand, do_not_fuse, + result_cache)) { + result = false; + break; } } - return true; + result_cache->emplace(std::make_pair(producer, consumer), result); + return result; } InstructionFusion::HloInstructionSet @@ -231,6 +241,8 @@ InstructionFusion::ComputeGloballyUnfusible( // fusing operations that require duplication later depending on // is_expensive_(). HloInstructionSet do_not_duplicate; + tensorflow::gtl::FlatMap, bool> + can_fuse_on_all_paths_result_cache; for (HloInstruction* consumer : post_order) { for (HloInstruction* producer : consumer->operands()) { if (do_not_duplicate.count(producer) > 0) { @@ -286,7 +298,8 @@ InstructionFusion::ComputeGloballyUnfusible( // A will be not allowed to be fused into B, as it cannot be fused via // all paths. if (producer->IsFusible() && - CanFuseOnAllPaths(producer, consumer, do_not_duplicate)) { + CanFuseOnAllPaths(producer, consumer, do_not_duplicate, + &can_fuse_on_all_paths_result_cache)) { continue; } do_not_duplicate.insert(producer); diff --git a/tensorflow/compiler/xla/service/instruction_fusion.h b/tensorflow/compiler/xla/service/instruction_fusion.h index 7e1196fb7f..c1ec3b18a1 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.h +++ b/tensorflow/compiler/xla/service/instruction_fusion.h @@ -151,8 +151,15 @@ class InstructionFusion : public HloModulePass { // Whether or not we can fuse producer into consumer on all paths // from the producer to the consumer where nodes are HLOs and edges are uses. - bool CanFuseOnAllPaths(HloInstruction* producer, HloInstruction* consumer, - const HloInstructionSet& do_not_fuse); + // + // A map from to a bool is required as the result cache + // to store and query the results of calls to this function, in order to avoid + // repeated computations. + bool CanFuseOnAllPaths( + HloInstruction* producer, HloInstruction* consumer, + const HloInstructionSet& do_not_fuse, + tensorflow::gtl::FlatMap, + bool>* result_cache); // Computes the set of nodes that we do not want to fuse into any of their // consumers based on a global analysis of the HLO graph. -- GitLab From b56164c72b8f123bfc675f930111af8801fe034f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 15:49:53 -0700 Subject: [PATCH 104/570] Automated rollback of commit 425e96f3ae4eb338268e3738260f9d79e4bdd893. Revert #20539. PiperOrigin-RevId: 214849875 --- tensorflow/contrib/layers/python/layers/embedding_ops.py | 8 +++----- tensorflow/python/feature_column/feature_column_v2.py | 8 +++----- tensorflow/python/ops/embedding_ops.py | 8 +++----- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index 17ee8c0733..60e1d85ea9 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -112,11 +112,9 @@ def safe_embedding_lookup_sparse(embedding_weights, dtype = sparse_weights.dtype if sparse_weights is not None else None if isinstance(embedding_weights, variables.PartitionedVariable): embedding_weights = list(embedding_weights) - if not isinstance(embedding_weights[0], - resource_variable_ops.ResourceVariable): - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] contrib_tensor_util.assert_same_float_dtype(embedding_weights + [sparse_weights]) diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index b62c16ea5a..289f6d0d14 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -3447,11 +3447,9 @@ def _safe_embedding_lookup_sparse(embedding_weights, raise ValueError('Missing embedding_weights %s.' % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None - if not isinstance(embedding_weights[0], - resource_variable_ops.ResourceVariable): - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] with ops.name_scope(name, 'embedding_lookup', embedding_weights + [sparse_ids, diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 60d73a1693..6263041b8d 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -550,11 +550,9 @@ def safe_embedding_lookup_sparse(embedding_weights, raise ValueError('Missing embedding_weights %s.' % embedding_weights) dtype = sparse_weights.dtype if sparse_weights is not None else None - if not isinstance(embedding_weights[0], - resource_variable_ops.ResourceVariable): - embedding_weights = [ - ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights - ] + embedding_weights = [ + ops.convert_to_tensor(w, dtype=dtype) for w in embedding_weights + ] with ops.name_scope(name, 'embedding_lookup', embedding_weights + [sparse_ids, -- GitLab From b8c86c3bbd8271ed968087f24e7fb704103bc733 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 15:50:41 -0700 Subject: [PATCH 105/570] Support saving/restoring of string tensors with lengths greater than 2^32. PiperOrigin-RevId: 214849978 --- tensorflow/core/util/tensor_bundle/BUILD | 1 + .../core/util/tensor_bundle/tensor_bundle.cc | 52 +++++++++----- .../util/tensor_bundle/tensor_bundle_test.cc | 64 +++++++++++++++++- .../testdata/old_string_tensors/README | 3 + .../foo.data-00000-of-00001 | Bin 0 -> 1080 bytes .../testdata/old_string_tensors/foo.index | Bin 0 -> 211 bytes 6 files changed, 100 insertions(+), 20 deletions(-) create mode 100644 tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/README create mode 100644 tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.data-00000-of-00001 create mode 100644 tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.index diff --git a/tensorflow/core/util/tensor_bundle/BUILD b/tensorflow/core/util/tensor_bundle/BUILD index 648358606c..4d4db86df2 100644 --- a/tensorflow/core/util/tensor_bundle/BUILD +++ b/tensorflow/core/util/tensor_bundle/BUILD @@ -64,6 +64,7 @@ cc_library( tf_cc_test( name = "tensor_bundle_test", srcs = ["tensor_bundle_test.cc"], + data = glob(["testdata/**"]), deps = [ ":tensor_bundle", "//tensorflow/core:framework", diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc index ea8a259d1a..2dcb57a1f9 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc @@ -64,27 +64,36 @@ namespace { // Reads "num_elements" string elements from file[offset, offset+size) into the // length-N "destination". Discards the original content of "destination". // -// Checksums the string lengths (as restored uint32, not varint32 bytes) and -// string bytes, and stores it into "actual_crc32c". +// Checksums the string lengths (as restored uint32 or uint64, not varint64 +// bytes) and string bytes, and stores it into "actual_crc32c". Status ReadStringTensor(io::InputBuffer* buffered_file, size_t num_elements, size_t offset, size_t size, string* destination, uint32* actual_crc32c) { if (size == 0) return Status::OK(); CHECK_GT(size, 0); - // Reads "num_elements" varint32's from "buffered_file". + // Reads "num_elements" varint64's from "buffered_file". TF_RETURN_IF_ERROR(buffered_file->Seek(offset)); - std::vector string_lengths(num_elements); + std::vector string_lengths(num_elements); for (size_t i = 0; i < num_elements; ++i) { - TF_RETURN_IF_ERROR(buffered_file->ReadVarint32(&string_lengths[i])); + TF_RETURN_IF_ERROR(buffered_file->ReadVarint64(&string_lengths[i])); + if (string_lengths[i] <= UINT32_MAX) { + // We need to do this because older checkpoints only used uint32s and we + // should still support them. + const uint32 elem_size_uint32 = static_cast(string_lengths[i]); + *actual_crc32c = crc32c::Extend( + *actual_crc32c, reinterpret_cast(&elem_size_uint32), + sizeof(uint32)); + } else { + *actual_crc32c = crc32c::Extend( + *actual_crc32c, reinterpret_cast(&string_lengths[i]), + sizeof(uint64)); + } } if (offset + size < buffered_file->Tell()) { return errors::DataLoss("String lengths longer than expected offset ", offset + size); } - *actual_crc32c = - crc32c::Value(reinterpret_cast(string_lengths.data()), - sizeof(uint32) * num_elements); // Reads the length-checksum. uint32 length_checksum = 0; @@ -104,7 +113,7 @@ Status ReadStringTensor(io::InputBuffer* buffered_file, size_t num_elements, // Reads the actual string bytes. for (size_t i = 0; i < num_elements; ++i) { - const uint32 string_length = string_lengths[i]; + const uint64 string_length = string_lengths[i]; string* buffer = &destination[i]; buffer->resize(string_length); @@ -218,8 +227,8 @@ Status WriteTensor(const Tensor& val, FileOutputBuffer* out, Status WriteStringTensor(const Tensor& val, FileOutputBuffer* out, size_t* bytes_written, uint32* crc32c) { // On-disk format: - // [varint32 len0]..[varint32 lenL][4 byte cksum on lengths][string bytes] - // Var "crc32c" checksums the string lengths (as uint32, not varint32 bytes), + // [varint64 len0]..[varint64 lenL][4 byte cksum on lengths][string bytes] + // Var "crc32c" checksums the string lengths (as uint64, not varint64 bytes), // the length-checksum, and all the string bytes. DCHECK_EQ(val.dtype(), DT_STRING); const string* strings = GetStringBackingBuffer(val); @@ -230,12 +239,21 @@ Status WriteStringTensor(const Tensor& val, FileOutputBuffer* out, *crc32c = 0; for (int64 i = 0; i < val.NumElements(); ++i) { const string* elem = &strings[i]; - DCHECK_EQ(elem->size(), static_cast(elem->size())); - const uint32 elem_size = static_cast(elem->size()); - - core::PutVarint32(&lengths, elem_size); - *crc32c = crc32c::Extend(*crc32c, reinterpret_cast(&elem_size), - sizeof(uint32)); + DCHECK_EQ(elem->size(), static_cast(elem->size())); + const uint64 elem_size = static_cast(elem->size()); + + core::PutVarint64(&lengths, elem_size); + if (elem_size <= UINT32_MAX) { + // We need to do this because older checkpoints only used uint32s and we + // should still support them. + const uint32 elem_size_uint32 = static_cast(elem_size); + *crc32c = crc32c::Extend(*crc32c, + reinterpret_cast(&elem_size_uint32), + sizeof(uint32)); + } else { + *crc32c = crc32c::Extend( + *crc32c, reinterpret_cast(&elem_size), sizeof(uint64)); + } } TF_RETURN_IF_ERROR(out->Append(lengths)); *bytes_written = lengths.size(); diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc index 59c42baa06..9567e4750b 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle_test.cc @@ -39,6 +39,11 @@ string Prefix(const string& prefix) { return strings::StrCat(testing::TmpDir(), "/", prefix); } +string TestdataPrefix(const string& prefix) { + return strings::StrCat(testing::TensorFlowSrcRoot(), + "/core/util/tensor_bundle/testdata/", prefix); +} + template Tensor Constant(T v, TensorShape shape) { Tensor ret(DataTypeToEnum::value, shape); @@ -458,7 +463,26 @@ TEST(TensorBundleTest, NonStandardShapes) { TestNonStandardShapes(); } +TEST(TensorBundleTest, StringTensorsOldFormat) { + // Test string tensor bundle made with previous version of code that use + // varint32s to store string lengths (we now use varint64s). + BundleReader reader(Env::Default(), TestdataPrefix("old_string_tensors/foo")); + TF_ASSERT_OK(reader.status()); + EXPECT_EQ(AllTensorKeys(&reader), + std::vector({"floats", "scalar", "string_tensor", "strs"})); + + Expect(&reader, "string_tensor", Tensor(DT_STRING, TensorShape({1}))); + Expect(&reader, "scalar", test::AsTensor({"hello"})); + Expect( + &reader, "strs", + test::AsTensor({"hello", "", "x01", string(1 << 10, 'c')})); + Expect(&reader, "floats", Constant_2x3(16.18)); +} + TEST(TensorBundleTest, StringTensors) { + constexpr size_t kLongLength = static_cast(UINT32_MAX) + 1; + Tensor long_string_tensor(DT_STRING, TensorShape({1})); + { BundleWriter writer(Env::Default(), Prefix("foo")); TF_EXPECT_OK(writer.Add("string_tensor", @@ -467,6 +491,12 @@ TEST(TensorBundleTest, StringTensors) { TF_EXPECT_OK(writer.Add( "strs", test::AsTensor({"hello", "", "x01", string(1 << 25, 'c')}))); + + // Requires a 64-bit length. + string* backing_string = long_string_tensor.flat().data(); + backing_string->assign(kLongLength, 'd'); + TF_EXPECT_OK(writer.Add("long_scalar", long_string_tensor)); + // Mixes in some floats. TF_EXPECT_OK(writer.Add("floats", Constant_2x3(16.18))); TF_ASSERT_OK(writer.Finish()); @@ -474,9 +504,9 @@ TEST(TensorBundleTest, StringTensors) { { BundleReader reader(Env::Default(), Prefix("foo")); TF_ASSERT_OK(reader.status()); - EXPECT_EQ( - AllTensorKeys(&reader), - std::vector({"floats", "scalar", "string_tensor", "strs"})); + EXPECT_EQ(AllTensorKeys(&reader), + std::vector({"floats", "long_scalar", "scalar", + "string_tensor", "strs"})); Expect(&reader, "string_tensor", Tensor(DT_STRING, TensorShape({1}))); @@ -484,7 +514,35 @@ TEST(TensorBundleTest, StringTensors) { Expect( &reader, "strs", test::AsTensor({"hello", "", "x01", string(1 << 25, 'c')})); + Expect(&reader, "floats", Constant_2x3(16.18)); + + // We don't use the Expect function so we can re-use the + // `long_string_tensor` buffer for reading out long_scalar to keep memory + // usage reasonable. + EXPECT_TRUE(reader.Contains("long_scalar")); + DataType dtype; + TensorShape shape; + TF_ASSERT_OK(reader.LookupDtypeAndShape("long_scalar", &dtype, &shape)); + EXPECT_EQ(DT_STRING, dtype); + EXPECT_EQ(TensorShape({1}), shape); + + // Zero-out the string so that we can be sure the new one is read in. + string* backing_string = long_string_tensor.flat().data(); + backing_string->assign(""); + + // Read long_scalar and check it contains kLongLength 'd's. + TF_ASSERT_OK(reader.Lookup("long_scalar", &long_string_tensor)); + ASSERT_EQ(backing_string, long_string_tensor.flat().data()); + EXPECT_EQ(kLongLength, backing_string->length()); + for (char c : *backing_string) { + // Not using ASSERT_EQ('d', c) because this way is twice as fast due to + // compiler optimizations. + if (c != 'd') { + FAIL() << "long_scalar is not full of 'd's as expected."; + break; + } + } } } diff --git a/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/README b/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/README new file mode 100644 index 0000000000..428d3ef79e --- /dev/null +++ b/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/README @@ -0,0 +1,3 @@ +This tensor bundle was generated from cl/214343133, before string tensor +lengths were written as varint64s. This is here to check backwards +compatibility between the new code and old checkpoints. diff --git a/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.data-00000-of-00001 b/tensorflow/core/util/tensor_bundle/testdata/old_string_tensors/foo.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..23b488e5feaefa970927bfd93c4a989fb494fae9 GIT binary patch literal 1080 zcmZQrRxN17dh^&E=Zw^xoP1UW<_3;AL9;J|c@+kR$)jL21V%$(#D>63$yfmMUc^v#0G370dByo~c@0+6z3J@xm>GqNp;}lJ_%$YS zmQ*9-v literal 0 HcmV?d00001 -- GitLab From ece50dd9992ac17e3094c7f6d1914febd7a036b5 Mon Sep 17 00:00:00 2001 From: Jiri Simsa Date: Thu, 27 Sep 2018 16:05:51 -0700 Subject: [PATCH 106/570] [tf.data Introducing tf.data.Dataset.reduce() which reduces elements of a (finite) dataset to a single element. PiperOrigin-RevId: 214852364 --- .../base_api/api_def_ReduceDataset.pbtxt | 26 ++++ .../data/group_by_reducer_dataset_op.cc | 4 +- .../data/group_by_window_dataset_op.cc | 4 +- tensorflow/core/kernels/data/iterator_ops.cc | 111 ++++++++++++++++ .../core/kernels/data/scan_dataset_op.cc | 4 +- tensorflow/core/ops/dataset_ops.cc | 13 ++ tensorflow/python/data/kernel_tests/BUILD | 18 +++ .../kernel_tests/reduce_dataset_op_test.py | 124 ++++++++++++++++++ tensorflow/python/data/ops/dataset_ops.py | 120 +++++++++++++++++ .../golden/v1/tensorflow.data.-dataset.pbtxt | 4 + ...ow.data.-fixed-length-record-dataset.pbtxt | 4 + .../tensorflow.data.-t-f-record-dataset.pbtxt | 4 + .../tensorflow.data.-text-line-dataset.pbtxt | 4 + .../golden/v2/tensorflow.data.-dataset.pbtxt | 4 + ...ow.data.-fixed-length-record-dataset.pbtxt | 4 + .../tensorflow.data.-t-f-record-dataset.pbtxt | 4 + .../tensorflow.data.-text-line-dataset.pbtxt | 4 + 17 files changed, 447 insertions(+), 9 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_ReduceDataset.pbtxt create mode 100644 tensorflow/python/data/kernel_tests/reduce_dataset_op_test.py diff --git a/tensorflow/core/api_def/base_api/api_def_ReduceDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ReduceDataset.pbtxt new file mode 100644 index 0000000000..08414b3e68 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ReduceDataset.pbtxt @@ -0,0 +1,26 @@ +op { + visibility: HIDDEN + graph_op_name: "ReduceDataset" + in_arg { + name: "input_dataset" + description: <graph_def_version()) { + : UnaryDatasetOpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("key_func", &key_func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("init_func", &init_func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("reduce_func", &reduce_func_)); @@ -421,7 +420,6 @@ class GroupByReducerDatasetOp : public UnaryDatasetOpKernel { const std::vector output_shapes_; }; - const int graph_def_version_; DataTypeVector output_types_; std::vector output_shapes_; NameAttrList key_func_; diff --git a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc index 8b417bb1c2..14aefe5d54 100644 --- a/tensorflow/core/kernels/data/group_by_window_dataset_op.cc +++ b/tensorflow/core/kernels/data/group_by_window_dataset_op.cc @@ -31,8 +31,7 @@ namespace { class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { public: explicit GroupByWindowDatasetOp(OpKernelConstruction* ctx) - : UnaryDatasetOpKernel(ctx), - graph_def_version_(ctx->graph_def_version()) { + : UnaryDatasetOpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("key_func", &key_func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("reduce_func", &reduce_func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("window_size_func", &window_size_func_)); @@ -507,7 +506,6 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { const std::vector output_shapes_; }; - const int graph_def_version_; DataTypeVector output_types_; std::vector output_shapes_; NameAttrList key_func_; diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index c0bc507ec0..7a833668ac 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -659,6 +659,115 @@ class ToSingleElementOp : public AsyncOpKernel { BackgroundWorker background_worker_; }; +class ReduceDatasetOp : public AsyncOpKernel { + public: + explicit ReduceDatasetOp(OpKernelConstruction* ctx) + : AsyncOpKernel(ctx), + background_worker_( + ctx->env(), + strings::StrCat("reduce_thread_", SanitizeThreadSuffix(name()))) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &reduce_func_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("use_inter_op_parallelism", + &use_inter_op_parallelism_)); + } + + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + // The call to `iterator->GetNext()` may block and depend on an + // inter-op thread pool thread, so we issue the call from the + // owned thread pool. + background_worker_.Schedule([this, ctx, done]() { + DatasetBase* dataset; + OP_REQUIRES_OK_ASYNC( + ctx, GetDatasetFromVariantTensor(ctx->input(0), &dataset), done); + OpInputList inputs; + OP_REQUIRES_OK_ASYNC(ctx, ctx->input_list("initial_state", &inputs), + done); + std::vector state(inputs.begin(), inputs.end()); + + std::unique_ptr captured_func; + OP_REQUIRES_OK_ASYNC( + ctx, + CapturedFunction::Create(reduce_func_, ctx, "other_arguments", + use_inter_op_parallelism_, &captured_func), + done); + + IteratorContext iter_ctx(ctx); + OP_REQUIRES_OK_ASYNC(ctx, captured_func->Instantiate(&iter_ctx), done); + + std::unique_ptr iterator; + OP_REQUIRES_OK_ASYNC( + ctx, dataset->MakeIterator(&iter_ctx, "ReduceIterator", &iterator), + done); + + // NOTE(jsimsa): We must destroy the iterator before calling `done()`, to + // avoid destruction races. + IteratorBase* raw_iterator = iterator.release(); + auto cleanup = gtl::MakeCleanup([raw_iterator, done] { + delete raw_iterator; + done(); + }); + + // Iterate through the input dataset. + Status status; + while (true) { + std::vector next_input_element; + bool end_of_input; + status = raw_iterator->GetNext(&iter_ctx, &next_input_element, + &end_of_input); + if (!status.ok() || end_of_input) { + break; + } + + // Run the reduce function to update the current state. + std::vector args; + args.reserve(state.size() + next_input_element.size()); + std::copy(state.begin(), state.end(), std::back_inserter(args)); + std::copy(next_input_element.begin(), next_input_element.end(), + std::back_inserter(args)); + + std::vector reduce_func_output; + status = + captured_func->Run(&iter_ctx, std::move(args), &reduce_func_output); + if (!status.ok()) { + break; + } + std::swap(reduce_func_output, state); + } + + if (!status.ok()) { + ctx->SetStatus(status); + return; + } + for (int i = 0; i < state.size(); ++i) { + OP_REQUIRES_ASYNC( + ctx, state[i].dtype() == output_types_[i], + errors::InvalidArgument( + "The result does not match the expected type for component ", i, + ". Expected: ", DataTypeString(output_types_[i]), + ". Actual: ", DataTypeString(state[i].dtype()), "."), + done); + OP_REQUIRES_ASYNC( + ctx, output_shapes_[i].IsCompatibleWith(state[i].shape()), + errors::InvalidArgument( + "The result does not match the expected shape for component ", + i, ". Expected: ", output_shapes_[i].DebugString(), + ". Actual: ", state[i].shape().DebugString(), "."), + done); + ctx->set_output(i, state[i]); + } + }); + } + + private: + NameAttrList reduce_func_; + DataTypeVector output_types_; + std::vector output_shapes_; + bool use_inter_op_parallelism_; + BackgroundWorker background_worker_; +}; + class OneShotIteratorOp : public AsyncOpKernel { public: explicit OneShotIteratorOp(OpKernelConstruction* ctx) @@ -1146,6 +1255,8 @@ REGISTER_KERNEL_BUILDER(Name("AnonymousIterator").Device(DEVICE_GPU), AnonymousIteratorHandleOp); REGISTER_KERNEL_BUILDER(Name("DatasetToSingleElement").Device(DEVICE_CPU), ToSingleElementOp); +REGISTER_KERNEL_BUILDER(Name("ReduceDataset").Device(DEVICE_CPU), + ReduceDatasetOp); REGISTER_KERNEL_BUILDER(Name("OneShotIterator").Device(DEVICE_CPU), OneShotIteratorOp); REGISTER_KERNEL_BUILDER(Name("IteratorGetNext").Device(DEVICE_CPU), diff --git a/tensorflow/core/kernels/data/scan_dataset_op.cc b/tensorflow/core/kernels/data/scan_dataset_op.cc index dbe31f37b8..2a911aa368 100644 --- a/tensorflow/core/kernels/data/scan_dataset_op.cc +++ b/tensorflow/core/kernels/data/scan_dataset_op.cc @@ -32,8 +32,7 @@ namespace { class ScanDatasetOp : public UnaryDatasetOpKernel { public: explicit ScanDatasetOp(OpKernelConstruction* ctx) - : UnaryDatasetOpKernel(ctx), - graph_def_version_(ctx->graph_def_version()) { + : UnaryDatasetOpKernel(ctx) { OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("Tstate", &state_types_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); @@ -258,7 +257,6 @@ class ScanDatasetOp : public UnaryDatasetOpKernel { const std::vector output_shapes_; }; - const int graph_def_version_; DataTypeVector state_types_; DataTypeVector output_types_; std::vector output_shapes_; diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 1ada623cf5..71f4cc3c4c 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -756,6 +756,19 @@ REGISTER_OP("DatasetToSingleElement") .Attr("output_shapes: list(shape) >= 1") .SetShapeFn(IteratorGetNextShapeFn); +REGISTER_OP("ReduceDataset") + .Input("input_dataset: variant") + .Input("initial_state: Tstate") + .Input("other_arguments: Targuments") + .Output("components: output_types") + .Attr("f: func") + .Attr("Tstate: list(type) >= 1") + .Attr("Targuments: list(type) >= 0") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .Attr("use_inter_op_parallelism: bool = true") + .SetShapeFn(IteratorGetNextShapeFn); + REGISTER_OP("IteratorToStringHandle") .Input("resource_handle: resource") .Output("string_handle: string") diff --git a/tensorflow/python/data/kernel_tests/BUILD b/tensorflow/python/data/kernel_tests/BUILD index fdcbfc3684..5f9818566f 100644 --- a/tensorflow/python/data/kernel_tests/BUILD +++ b/tensorflow/python/data/kernel_tests/BUILD @@ -404,6 +404,24 @@ tf_py_test( ], ) +tf_py_test( + name = "reduce_dataset_op_test", + size = "small", + srcs = ["reduce_dataset_op_test.py"], + additional_deps = [ + ":test_base", + "@absl_py//absl/testing:parameterized", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python/data/ops:dataset_ops", + ], +) + tf_py_test( name = "sequence_dataset_op_test", size = "small", diff --git a/tensorflow/python/data/kernel_tests/reduce_dataset_op_test.py b/tensorflow/python/data/kernel_tests/reduce_dataset_op_test.py new file mode 100644 index 0000000000..11e07300b9 --- /dev/null +++ b/tensorflow/python/data/kernel_tests/reduce_dataset_op_test.py @@ -0,0 +1,124 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized +import numpy as np + +from tensorflow.python.data.kernel_tests import test_base +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class ReduceDatasetTest(test_base.DatasetTestBase, parameterized.TestCase): + + def testSum(self): + for i in range(10): + ds = dataset_ops.Dataset.range(1, i + 1) + result = ds.reduce(np.int64(0), lambda x, y: x + y) + with self.cached_session() as sess: + self.assertEqual(((i + 1) * i) // 2, sess.run(result)) + + def testSumTuple(self): + + def reduce_fn(state, value): + v1, v2 = value + return state + v1 + v2 + + for i in range(10): + ds = dataset_ops.Dataset.range(1, i + 1) + ds = dataset_ops.Dataset.zip((ds, ds)) + result = ds.reduce(np.int64(0), reduce_fn) + with self.cached_session() as sess: + self.assertEqual(((i + 1) * i), sess.run(result)) + + def testSumAndCount(self): + + def reduce_fn(state, value): + s, c = state + return s + value, c + 1 + + for i in range(10): + ds = dataset_ops.Dataset.range(1, i + 1) + result = ds.reduce((np.int64(0), np.int64(0)), reduce_fn) + with self.cached_session() as sess: + s, c = sess.run(result) + self.assertEqual(((i + 1) * i) // 2, s) + self.assertEqual(i, c) + + def testSquareUsingPlaceholder(self): + delta = array_ops.placeholder(dtype=dtypes.int64) + + def reduce_fn(state, _): + return state + delta + + for i in range(10): + ds = dataset_ops.Dataset.range(1, i + 1) + result = ds.reduce(np.int64(0), reduce_fn) + with self.cached_session() as sess: + square = sess.run(result, feed_dict={delta: i}) + self.assertEqual(i * i, square) + + def testSparse(self): + + def reduce_fn(_, value): + return value + + def make_sparse_fn(i): + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1])), + dense_shape=np.array([1, 1])) + + for i in range(10): + ds = dataset_ops.Dataset.from_tensors(make_sparse_fn(i+1)) + result = ds.reduce(make_sparse_fn(0), reduce_fn) + with self.cached_session() as sess: + self.assertSparseValuesEqual(make_sparse_fn(i+1), sess.run(result)) + + def testNested(self): + + def reduce_fn(state, value): + state["dense"] += value["dense"] + state["sparse"] = value["sparse"] + return state + + def make_sparse_fn(i): + return sparse_tensor.SparseTensorValue( + indices=np.array([[0, 0]]), + values=(i * np.array([1])), + dense_shape=np.array([1, 1])) + + def map_fn(i): + return {"dense": math_ops.cast(i, dtype=dtypes.int64), + "sparse": make_sparse_fn(math_ops.cast(i, dtype=dtypes.int64))} + + for i in range(10): + ds = dataset_ops.Dataset.range(1, i + 1).map(map_fn) + result = ds.reduce(map_fn(0), reduce_fn) + with self.cached_session() as sess: + result = sess.run(result) + self.assertEqual(((i + 1) * i) // 2, result["dense"]) + self.assertSparseValuesEqual(make_sparse_fn(i), result["sparse"]) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index ac87a451b1..6bba72a8e9 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -1205,6 +1205,126 @@ class Dataset(object): shift = size return WindowDataset(self, size, shift, stride, drop_remainder) + def reduce(self, initial_state, reduce_func): + """Reduces the input dataset to a single element. + + The transformation calls `reduce_func` successively on every element of + the input dataset until the dataset is exhausted, aggregating information in + its internal state. The `initial_state` argument is used for the initial + state and the final state is returned as the result. + + For example: + - `tf.data.Dataset.range(5).reduce(np.int64(0), lambda x, _: x + 1)` + produces `5` + - `tf.data.Dataset.range(5).reduce(np.int64(0), lambda x, y: x + y)` + produces `10` + + Args: + initial_state: A nested structure of tensors, representing the initial + state of the transformation. + reduce_func: A function that maps `(old_state, input_element)` to + `new_state`. It must take two arguments and return a nested structure + of tensors. The structure of `new_state` must match the structure of + `initial_state`. + + Returns: + A nested structure of `tf.Tensor` objects, corresponding to the final + state of the transformation. + + """ + + with ops.name_scope("initial_state"): + # Convert any `SparseTensorValue`s to `SparseTensor`s and all other + # values to tensors. + initial_state = nest.pack_sequence_as(initial_state, [ + sparse_tensor_lib.SparseTensor.from_value(t) + if sparse_tensor_lib.is_sparse(t) else ops.convert_to_tensor( + t, name="component_%d" % i) + for i, t in enumerate(nest.flatten(initial_state)) + ]) + + # Compute initial values for the state classes, shapes and types based on + # the initial state. + state_classes = sparse.get_classes(initial_state) + state_shapes = nest.pack_sequence_as( + initial_state, [t.get_shape() for t in nest.flatten(initial_state)]) + state_types = nest.pack_sequence_as( + initial_state, [t.dtype for t in nest.flatten(initial_state)]) + + # Iteratively rerun the reduce function until reaching a fixed point on + # `self._state_shapes`. + need_to_rerun = True + while need_to_rerun: + + wrapped_func = StructuredFunctionWrapper( + reduce_func, + "reduce()", + input_classes=(state_classes, self.output_classes), + input_shapes=(state_shapes, self.output_shapes), + input_types=(state_types, self.output_types), + add_to_graph=False) + + # Extract and validate class information from the returned values. + output_classes = wrapped_func.output_classes + for new_state_class, state_class in zip( + nest.flatten(output_classes), nest.flatten(state_classes)): + if not issubclass(new_state_class, state_class): + raise TypeError( + "The element classes for the new state must match the initial " + "state. Expected %s; got %s." % (state_classes, + wrapped_func.output_classes)) + + # Extract and validate type information from the returned values. + output_types = wrapped_func.output_types + for new_state_type, state_type in zip( + nest.flatten(output_types), nest.flatten(state_types)): + if new_state_type != state_type: + raise TypeError( + "The element types for the new state must match the initial " + "state. Expected %s; got %s." % (state_types, + wrapped_func.output_types)) + + # Extract shape information from the returned values. + output_shapes = wrapped_func.output_shapes + flat_state_shapes = nest.flatten(state_shapes) + flat_new_state_shapes = nest.flatten(output_shapes) + weakened_state_shapes = [ + original.most_specific_compatible_shape(new) + for original, new in zip(flat_state_shapes, flat_new_state_shapes) + ] + + need_to_rerun = False + for original_shape, weakened_shape in zip(flat_state_shapes, + weakened_state_shapes): + if original_shape.ndims is not None and ( + weakened_shape.ndims is None or + original_shape.as_list() != weakened_shape.as_list()): + need_to_rerun = True + break + + if need_to_rerun: + state_shapes = nest.pack_sequence_as(state_shapes, + weakened_state_shapes) + + reduce_func = wrapped_func.function + reduce_func.add_to_graph(ops.get_default_graph()) + + return sparse.deserialize_sparse_tensors( + nest.pack_sequence_as( + output_types, + gen_dataset_ops.reduce_dataset( + self._as_variant_tensor(), # pylint: disable=protected-access + nest.flatten(sparse.serialize_sparse_tensors(initial_state)), + reduce_func.captured_inputs, + f=reduce_func, + output_shapes=nest.flatten( + sparse.as_dense_shapes(output_shapes, output_classes)), + output_types=nest.flatten( + sparse.as_dense_types(output_types, output_classes)))), + output_types, + output_shapes, + output_classes) + class DatasetSource(Dataset): """Abstract class representing a dataset with no inputs.""" diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt index c3ba2dba57..825afb622f 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-dataset.pbtxt @@ -90,6 +90,10 @@ tf_class { name: "range" argspec: "args=[], varargs=args, keywords=None, defaults=None" } + member_method { + name: "reduce" + argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "repeat" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt index 3541671bee..cdad5f6360 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-fixed-length-record-dataset.pbtxt @@ -91,6 +91,10 @@ tf_class { name: "range" argspec: "args=[], varargs=args, keywords=None, defaults=None" } + member_method { + name: "reduce" + argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "repeat" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt index b113c18ee0..df41bff1b5 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-t-f-record-dataset.pbtxt @@ -91,6 +91,10 @@ tf_class { name: "range" argspec: "args=[], varargs=args, keywords=None, defaults=None" } + member_method { + name: "reduce" + argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "repeat" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt index 7210bf5db4..028bcc2ce9 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.data.-text-line-dataset.pbtxt @@ -91,6 +91,10 @@ tf_class { name: "range" argspec: "args=[], varargs=args, keywords=None, defaults=None" } + member_method { + name: "reduce" + argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "repeat" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt index c3ba2dba57..825afb622f 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-dataset.pbtxt @@ -90,6 +90,10 @@ tf_class { name: "range" argspec: "args=[], varargs=args, keywords=None, defaults=None" } + member_method { + name: "reduce" + argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "repeat" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt index 3541671bee..cdad5f6360 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-fixed-length-record-dataset.pbtxt @@ -91,6 +91,10 @@ tf_class { name: "range" argspec: "args=[], varargs=args, keywords=None, defaults=None" } + member_method { + name: "reduce" + argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "repeat" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt index b113c18ee0..df41bff1b5 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-t-f-record-dataset.pbtxt @@ -91,6 +91,10 @@ tf_class { name: "range" argspec: "args=[], varargs=args, keywords=None, defaults=None" } + member_method { + name: "reduce" + argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "repeat" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], " diff --git a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt index 7210bf5db4..028bcc2ce9 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.data.-text-line-dataset.pbtxt @@ -91,6 +91,10 @@ tf_class { name: "range" argspec: "args=[], varargs=args, keywords=None, defaults=None" } + member_method { + name: "reduce" + argspec: "args=[\'self\', \'initial_state\', \'reduce_func\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "repeat" argspec: "args=[\'self\', \'count\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From d8a370274d6ab8c68edcce66849b4e96aed2fa0d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 16:10:08 -0700 Subject: [PATCH 107/570] Optimize ParseNodeNameAsStringPiece and related functions, since they are the most costly functions in Grappler. PiperOrigin-RevId: 214853009 --- .../core/grappler/optimizers/data/BUILD | 1 + .../optimizers/data/function_utils.cc | 1 + tensorflow/core/grappler/utils.cc | 39 ------- tensorflow/core/grappler/utils.h | 110 +++++++++++++----- tensorflow/core/grappler/utils_test.cc | 19 +++ 5 files changed, 102 insertions(+), 68 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/data/BUILD b/tensorflow/core/grappler/optimizers/data/BUILD index d198a2a591..81c1bddf67 100644 --- a/tensorflow/core/grappler/optimizers/data/BUILD +++ b/tensorflow/core/grappler/optimizers/data/BUILD @@ -94,6 +94,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core/grappler:mutable_graph_view", "//tensorflow/core/grappler:utils", + "//tensorflow/core:lib_internal", ] + tf_protos_all(), ) diff --git a/tensorflow/core/grappler/optimizers/data/function_utils.cc b/tensorflow/core/grappler/optimizers/data/function_utils.cc index e3f6d8e1ea..311df15bc2 100644 --- a/tensorflow/core/grappler/optimizers/data/function_utils.cc +++ b/tensorflow/core/grappler/optimizers/data/function_utils.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/framework/device_base.h" #include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/util/ptr_util.h" namespace tensorflow { diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index db6e4e6852..5867d01324 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -156,45 +156,6 @@ bool IsControlInput(const string& name) { return !name.empty() && name[0] == '^'; } -string NodeName(const string& name) { - int position; - return ParseNodeName(name, &position); -} - -int NodePosition(const string& name) { - int position; - ParseNodeNameAsStringPiece(name, &position); - return position; -} - -int NodePositionIfSameNode(const string& input_name, const string& node_name) { - const bool is_ctrl = input_name[0] == '^'; - auto input_it = is_ctrl ? input_name.begin() + 1 : input_name.begin(); - auto node_it = node_name.begin(); - if (node_name.empty() || - std::distance(input_it, input_name.end()) < node_name.size()) { - return -2; - } - while (node_it != node_name.end()) { - if (*input_it++ != *node_it++) { - return -2; - } - } - if (input_it == input_name.end()) { - return is_ctrl ? -1 : 0; - } else if (*input_it++ == ':') { - StringPiece remaining(&(*input_it), - std::distance(input_it, input_name.end())); - int position; - if (!strings::safe_strto32(remaining, &position)) { - return -2; - } - return is_ctrl ? -1 : position; - } else { - return -2; - } -} - string AddPrefixToNodeName(const string& name, const string& prefix, const string& delimiter) { if (!name.empty()) { diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index 296ee1678e..95126d470c 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -29,7 +29,6 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" -#include "tensorflow/core/lib/strings/scanner.h" namespace tensorflow { namespace grappler { @@ -102,40 +101,92 @@ bool IsControlInput(const string& name); // True iff 'name1' and 'name2' refer to the same input. bool IsSameInput(const string& name1, const string& name2); +// Returns the trailing position number (or zero if no number is present) if +// NodeName(input_name) is equal to node_name. Returns -1 for control inputs. +// Returns -2 if NodeName(input_name) is not equal to node_name. +// Note: This function is used very heavily, and this hand-optimized +// version is 3-4x faster than the version using Scanner, which it replaced. +// This is worth the reduction in readability. +inline int NodePositionIfSameNode(const string& input_name, + const string& node_name) { + if (input_name.empty()) return -2; + const bool is_ctrl = input_name[0] == '^'; + auto input_it = is_ctrl ? input_name.begin() + 1 : input_name.begin(); + auto node_it = node_name.begin(); + if (node_name.empty() || + std::distance(input_it, input_name.end()) < node_name.size()) { + return -2; + } + while (node_it != node_name.end()) { + if (*input_it++ != *node_it++) { + return -2; + } + } + if (input_it == input_name.end()) { + return is_ctrl ? -1 : 0; + } else if (*input_it++ == ':') { + StringPiece remaining(&(*input_it), + std::distance(input_it, input_name.end())); + int position; + if (!strings::safe_strto32(remaining, &position)) { + return -2; + } + return is_ctrl ? -1 : position; + } else { + return -2; + } +} + // Return the node name corresponding to 'name' if name is valid, or the empty // string otherwise. -string NodeName(const string& name); +inline StringPiece NodeNameAsStringPiece(const string& name) { + static const string empty; + if (name.empty()) return StringPiece(empty); + const auto begin_it = name[0] == '^' ? name.begin() + 1 : name.begin(); + auto end_it = begin_it; + while (end_it != name.end() && *end_it != ':') { + ++end_it; + } + if (end_it != name.end() && *end_it != ':') { + return StringPiece(empty); + } + return StringPiece(&(*begin_it), std::distance(begin_it, end_it)); +} -// Get the trailing position number ":{digits}" (if any) of a node name. -// Returns -1 for control inputs. -int NodePosition(const string& name); +// Return the node name corresponding to 'name' if name is valid, or the empty +// string otherwise. +inline string NodeName(const string& name) { + return string(NodeNameAsStringPiece(name)); +} +// Returns the node name and position in a single call. inline StringPiece ParseNodeNameAsStringPiece(const string& name, int* position) { - // Strip the prefix '^' (if any), and strip the trailing ":{digits} (if any) - // to get a node name. - strings::Scanner scan(name); - scan.ZeroOrOneLiteral("^") - .RestartCapture() - .One(strings::Scanner::LETTER_DIGIT_DOT_UNDERSCORE) - .Any(strings::Scanner::LETTER_DIGIT_DASH_DOT_SLASH_UNDERSCORE); - StringPiece capture; - StringPiece remaining; - if (scan.Peek(':') != ':' || !scan.GetResult(&remaining, &capture)) { + static const string empty; + if (name.empty()) { *position = 0; - static const string empty; return StringPiece(empty); - } else { - if (name[0] == '^') { - *position = -1; - } else if (remaining.empty()) { - *position = 0; - } else { - // Skip the first ':' character. - CHECK(strings::safe_strto32(remaining.substr(1), position)); + } + const bool is_ctrl = name[0] == '^'; + const auto begin_it = is_ctrl ? name.begin() + 1 : name.begin(); + *position = is_ctrl ? -1 : 0; + auto end_it = begin_it; + while (end_it != name.end() && *end_it != ':') { + ++end_it; + } + const StringPiece node_name(&(*begin_it), std::distance(begin_it, end_it)); + if (end_it != name.end()) { + if (*end_it != ':') { + return StringPiece(empty); + } else if (!is_ctrl) { + ++end_it; + StringPiece remaining(&(*end_it), std::distance(end_it, name.end())); + if (!strings::safe_strto32(remaining, position)) { + return StringPiece(empty); + } } - return capture; } + return node_name; } // Returns the node name and position in a single call. @@ -143,10 +194,11 @@ inline string ParseNodeName(const string& name, int* position) { return string(ParseNodeNameAsStringPiece(name, position)); } -// Returns NodePosition(input_name) if NodeName(input_name) == node_name. -// Otherwise returns -2; -// REQUIRES: inputs_name.size() > 0 && node_name.size() > 0. -int NodePositionIfSameNode(const string& input_name, const string& node_name); +inline int NodePosition(const string& name) { + int position; + ParseNodeNameAsStringPiece(name, &position); + return position; +} // Add a prefix to a node name with a custom delimiter. string AddPrefixToNodeName(const string& name, const string& prefix, diff --git a/tensorflow/core/grappler/utils_test.cc b/tensorflow/core/grappler/utils_test.cc index 6b787a6910..9b6c1f690b 100644 --- a/tensorflow/core/grappler/utils_test.cc +++ b/tensorflow/core/grappler/utils_test.cc @@ -371,6 +371,25 @@ BM_NodePositionIfSameNode("^foo/bar/baz", "foo/bar/baz", Match_Ctrl); BM_NodePositionIfSameNode("blah", "foo/bar/baz", NoMatch_0); BM_NodePositionIfSameNode("foo/bar/baz/gnu", "foo/bar/baz", NoMatch_end); +#define BM_ParseNodeNameAsStringPiece(I, NAME) \ + static void BM_ParseNodeNameAsStringPiece_##NAME(int iters) { \ + string input = I; \ + for (int i = 0; i < iters; ++i) { \ + int position; \ + const StringPiece name = ParseNodeNameAsStringPiece(input, &position); \ + CHECK_GE(position, -1); \ + CHECK(!name.empty()); \ + } \ + } \ + BENCHMARK(BM_ParseNodeNameAsStringPiece_##NAME) + +BM_ParseNodeNameAsStringPiece("foo", foo); +BM_ParseNodeNameAsStringPiece("foo/bar/baz", foo_bar_baz); +BM_ParseNodeNameAsStringPiece("^foo/bar/baz", foo_bar_baz_ctrl); +BM_ParseNodeNameAsStringPiece("foo:123", foo123); +BM_ParseNodeNameAsStringPiece("foo/bar/baz:123", foo_bar_baz_123); +BM_ParseNodeNameAsStringPiece("^foo/bar/baz:123", foo_bar_baz_123_ctrl); + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From f41573b7956871b4142c97eb85ddf163ad641976 Mon Sep 17 00:00:00 2001 From: Revan Sopher Date: Thu, 27 Sep 2018 16:16:20 -0700 Subject: [PATCH 108/570] Automated rollback of commit 750466c6e6624d279de7f9a43accd682d487509c PiperOrigin-RevId: 214853846 --- tensorflow/core/BUILD | 16 -- .../core/common_runtime/direct_session.cc | 49 +--- .../core/common_runtime/direct_session.h | 3 - .../common_runtime/direct_session_test.cc | 28 -- tensorflow/core/framework/run_handler.cc | 248 ------------------ tensorflow/core/framework/run_handler.h | 95 ------- tensorflow/core/framework/run_handler_util.cc | 57 ---- tensorflow/core/framework/run_handler_util.h | 43 --- .../core/framework/run_handler_util_test.cc | 93 ------- tensorflow/core/protobuf/config.proto | 5 - ...ensorflow.-run-options.-experimental.pbtxt | 6 - .../golden/v1/tensorflow.-run-options.pbtxt | 6 - ...ensorflow.-run-options.-experimental.pbtxt | 6 - .../golden/v2/tensorflow.-run-options.pbtxt | 6 - 14 files changed, 6 insertions(+), 655 deletions(-) delete mode 100644 tensorflow/core/framework/run_handler.cc delete mode 100644 tensorflow/core/framework/run_handler.h delete mode 100644 tensorflow/core/framework/run_handler_util.cc delete mode 100644 tensorflow/core/framework/run_handler_util.h delete mode 100644 tensorflow/core/framework/run_handler_util_test.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 01e2e9f62b..ca247dc56b 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2484,8 +2484,6 @@ FRAMEWORK_INTERNAL_PUBLIC_HEADERS = [ "framework/op_segment.h", "framework/rendezvous.h", # only needed for tests "framework/resource_var.h", - "framework/run_handler.h", - "framework/run_handler_util.h", "framework/tensor_reference.h", "framework/tracking_allocator.h", # only needed for tests "framework/unique_tensor_references.h", @@ -2972,7 +2970,6 @@ tf_cuda_library( ":core_cpu_internal", ":device_tracer", ":framework", - ":framework_internal", ":graph", ":lib", ":lib_internal", @@ -4120,19 +4117,6 @@ tf_cc_test( ], ) -tf_cc_test( - name = "framework_run_handler_util_test", - size = "small", - srcs = ["framework/run_handler_util_test.cc"], - linkstatic = tf_kernel_tests_linkstatic(), - deps = [ - ":framework_internal", - ":lib", - ":test", - ":test_main", - ], -) - tf_cuda_cc_test( name = "common_runtime_direct_session_test", size = "small", diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 458e133b68..841181f8c3 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -40,7 +40,6 @@ limitations under the License. #include "tensorflow/core/framework/graph_def_util.h" #include "tensorflow/core/framework/log_memory.h" #include "tensorflow/core/framework/node_def.pb.h" -#include "tensorflow/core/framework/run_handler.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/graph/algorithm.h" @@ -245,21 +244,6 @@ void DirectSession::SchedClosure(thread::ThreadPool* pool, #endif // __ANDROID__ } -static RunHandlerPool* GetOrCreateRunHandlerPool( - const SessionOptions& options) { - static RunHandlerPool* pool = - new RunHandlerPool(NumInterOpThreadsFromSessionOptions(options)); - return pool; -} - -bool DirectSession::ShouldUseRunHandlerPool() const { - if (options_.config.session_inter_op_thread_pool_size() > 0 || - options_.config.use_per_session_threads()) { - return false; - } - return true; -} - DirectSession::DirectSession(const SessionOptions& options, const DeviceMgr* device_mgr, DirectSessionFactory* const factory) @@ -598,37 +582,16 @@ Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options, } } - std::unique_ptr handler; - if (ShouldUseRunHandlerPool() && - run_options.experimental().use_run_handler_pool()) { - // Non-null only when a global inter-op pool is used. - VLOG(1) << "Using RunHandler to scheduler inter-op closures."; - handler = GetOrCreateRunHandlerPool(options_)->Get(); - } - auto* handler_ptr = handler.get(); - - Executor::Args::Runner default_runner = nullptr; - - if (pool == nullptr) { - default_runner = [](Executor::Args::Closure c) { c(); }; - } else if (handler_ptr != nullptr) { - default_runner = [handler_ptr](Executor::Args::Closure c) { - handler_ptr->ScheduleInterOpClosure(std::move(c)); - }; - } else { - default_runner = [this, pool](Executor::Args::Closure c) { - SchedClosure(pool, std::move(c)); - }; - } - + Executor::Args::Runner default_runner = [this, + pool](Executor::Args::Closure c) { + SchedClosure(pool, std::move(c)); + }; for (const auto& item : executors_and_keys->items) { - // TODO(azaks): support partial run. - // TODO(azaks): if the device picks its own threadpool, we need to assign + // TODO(zhengxq): support partial run. + // TODO(zhengxq): if the device picks its own threadpool, we need to assign // less threads to the main compute pool by default. thread::ThreadPool* device_thread_pool = item.device->tensorflow_device_thread_pool(); - // TODO(crk): Investigate usage of RunHandlerPool when using device specific - // thread pool(s). if (!device_thread_pool) { args.runner = default_runner; } else { diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index 3a168bbe3f..4a6a921ea7 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -247,9 +247,6 @@ class DirectSession : public Session { ExecutorsAndKeys* executors_and_keys, RunMetadata* run_metadata); - // Returns whether inter-op execution uses a global pool. - bool ShouldUseRunHandlerPool() const; - ::tensorflow::Status ExtendLocked(const GraphDef& graph) EXCLUSIVE_LOCKS_REQUIRED(graph_state_lock_); diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc index e3e431f800..65e816c202 100644 --- a/tensorflow/core/common_runtime/direct_session_test.cc +++ b/tensorflow/core/common_runtime/direct_session_test.cc @@ -625,34 +625,6 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetworkWithOpts_Callable) { EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 2); } -TEST_F(DirectSessionMinusAXTest, UseRunHandlerPool) { - Initialize({3, 2, -1, 0}); - auto session = CreateSession(); - ASSERT_TRUE(session != nullptr); - TF_ASSERT_OK(session->Create(def_)); - std::vector> inputs; - - // Request two targets: one fetch output and one non-fetched output. - std::vector output_names = {y_ + ":0"}; - std::vector target_nodes = {y_neg_}; - std::vector outputs; - - // Prepares RunOptions and RunMetadata - RunOptions run_options; - run_options.mutable_experimental()->set_use_run_handler_pool(true); - - Status s = session->Run(run_options, inputs, output_names, target_nodes, - &outputs, nullptr); - TF_ASSERT_OK(s); - - ASSERT_EQ(1, outputs.size()); - // The first output should be initialized and have the correct - // output. - auto mat = outputs[0].matrix(); - ASSERT_TRUE(outputs[0].IsInitialized()); - EXPECT_FLOAT_EQ(5.0, mat(0, 0)); -} - TEST(DirectSessionTest, KeepsStateAcrossRunsOfSession) { GraphDef def; Graph g(OpRegistry::Global()); diff --git a/tensorflow/core/framework/run_handler.cc b/tensorflow/core/framework/run_handler.cc deleted file mode 100644 index 9c6490a603..0000000000 --- a/tensorflow/core/framework/run_handler.cc +++ /dev/null @@ -1,248 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#define EIGEN_USE_THREADS - -#include "tensorflow/core/framework/run_handler.h" - -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/framework/run_handler_util.h" -#include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/util/ptr_util.h" - -namespace tensorflow { - -// Contains the concrete implementation of the RunHandler. -// Externally visible RunHandler class simply forwards the work to this one. -class RunHandler::Impl { - public: - explicit Impl(RunHandlerPool::Impl* pool_impl) : pool_impl_(pool_impl) { - Reset(); - } - - ~Impl() {} - - void set_inter_op_scheduling_range(std::uint_fast32_t start, - std::uint_fast32_t limit) { - inter_op_scheduling_range_.store(EncodePartition(start, limit), - std::memory_order_release); - } - - std::uint_fast32_t inter_op_scheduling_range() const { - return inter_op_scheduling_range_.load(std::memory_order_acquire); - } - - // Stores now time (in microseconds) since unix epoch when the handler is - // requested via RunHandlerPool::Get(). - uint64 start_time_us() const { return start_time_us_; } - - void ScheduleInterOpClosure(std::function fn); - - void Reset(); - - RunHandlerPool::Impl* pool_impl() { return pool_impl_; } - - private: - // Encoding/decoding logic for storing [start, limit) into a single - // uint_fast32_t int. We assume that pool_num_threads < (1 << 16). - const int kMaxPartitionBits = 16; - const int kMaxThreads = 1 << kMaxPartitionBits; - - std::uint_fast32_t EncodePartition(std::uint_fast32_t start, - std::uint_fast32_t limit) { - return (start << kMaxPartitionBits) | limit; - } - - void DecodePartition(std::uint_fast32_t val, std::uint_fast32_t* start, - std::uint_fast32_t* limit) { - *limit = val & (kMaxThreads - 1); - val >>= kMaxPartitionBits; - *start = val; - } - - std::atomic_uint_fast32_t inter_op_scheduling_range_; - RunHandlerPool::Impl* pool_impl_; // NOT OWNED. - uint64 start_time_us_; -}; - -// Contains shared state across all run handlers present in the pool. Also -// responsible for pool management decisions. -// This class is thread safe. -class RunHandlerPool::Impl { - public: - // Maximum number of handlers pre-created during pool construction time. The - // number has been chosen expecting each handler might at least want 1 - // inter-op thread for execution (during compute intensive workloads like - // inference). - static const int kMaxHandlers = 128; - - explicit Impl(int num_inter_op_threads) - : inter_op_thread_pool_(new thread::ThreadPool( - Env::Default(), ThreadOptions(), "inter_op", num_inter_op_threads)), - iterations_(0) { - VLOG(1) << "Creating a RunHandlerPool with max handlers: " << kMaxHandlers; - for (int i = 0; i < kMaxHandlers; ++i) { - handlers_.emplace_back(new RunHandler::Impl(this)); - free_handlers_.push_back(handlers_.back().get()); - } - } - - ~Impl() { - // Sanity check that all handlers have been returned back to the pool before - // destruction. - DCHECK_EQ(handlers_.size(), kMaxHandlers); - DCHECK_EQ(free_handlers_.size(), handlers_.size()); - DCHECK_EQ(sorted_active_handlers_.size(), 0); - } - - thread::ThreadPool* inter_op_thread_pool() const { - return inter_op_thread_pool_.get(); - } - - std::unique_ptr Get() LOCKS_EXCLUDED(mu_) { - mutex_lock l(mu_); - while (free_handlers_.empty()) { - one_handler_free_.wait(l); - } - // Remove the last entry from free_handlers_ and add to the end of - // sorted_active_handlers_. - auto* handler_impl = free_handlers_.back(); - handler_impl->Reset(); - // Sortedness isn't violated if we simply add at the end of the list, since - // handlers are expected to be obtained in increasing order of time. - sorted_active_handlers_.push_back(handler_impl); - DCHECK_LE(sorted_active_handlers_.size(), kMaxHandlers); - free_handlers_.pop_back(); - - RecomputePoolStatsLocked(); - return WrapUnique(new RunHandler(handler_impl)); - } - - void ReleaseHandler(RunHandler::Impl* handler) LOCKS_EXCLUDED(mu_) { - { - mutex_lock l(mu_); - DCHECK_GT(sorted_active_handlers_.size(), 0); - - uint64 now = tensorflow::Env::Default()->NowMicros(); - double elapsed = (now - handler->start_time_us()) / 1000.0; - time_hist_.Add(elapsed); - - // Erase from and update sorted_active_handlers_. Add it to the end of - // free_handlers_. - auto iter = std::find(sorted_active_handlers_.begin(), - sorted_active_handlers_.end(), handler); - DCHECK(iter != sorted_active_handlers_.end()) - << "Unexpected handler: " << handler - << " is being requested for release"; - - // Remove this handler from this list and add it to the list of free - // handlers. - sorted_active_handlers_.erase(iter); - free_handlers_.push_back(handler); - DCHECK_LE(free_handlers_.size(), kMaxHandlers); - - RecomputePoolStatsLocked(); - } - one_handler_free_.notify_one(); - } - - private: - void RecomputePoolStatsLocked() EXCLUSIVE_LOCKS_REQUIRED(mu_); - - // Thread safe part. - const std::unique_ptr inter_op_thread_pool_; - - // Thread compatible part used only by lock under RunHandlerPool. - // Handlers are sorted by start time. - std::vector sorted_active_handlers_ GUARDED_BY(mu_); - std::vector free_handlers_ GUARDED_BY(mu_); - std::vector> handlers_ GUARDED_BY(mu_); - // Histogram of elapsed runtime of every handler (in ms). - histogram::Histogram time_hist_ GUARDED_BY(mu_); - std::vector inter_op_start_ GUARDED_BY(mu_); - std::vector inter_op_limit_ GUARDED_BY(mu_); - int64 iterations_ GUARDED_BY(mu_); - condition_variable one_handler_free_; - mutex mu_; -}; - -void RunHandlerPool::Impl::RecomputePoolStatsLocked() { - int num_active_requests = sorted_active_handlers_.size(); - if (num_active_requests == 0) return; - - int num_threads = inter_op_thread_pool_->NumThreads(); - - inter_op_start_.resize(num_active_requests); - inter_op_limit_.resize(num_active_requests); - - const int kMinThreadsPerRequest = 3; - ComputeInterOpSchedulingRanges(num_active_requests, num_threads, - kMinThreadsPerRequest, &inter_op_start_, - &inter_op_limit_); - - for (int i = 0; i < num_active_requests; ++i) { - sorted_active_handlers_[i]->set_inter_op_scheduling_range( - inter_op_start_[i], inter_op_limit_[i]); - } - - if (iterations_++ % 5000 == 0 && VLOG_IS_ON(1)) { - VLOG(1) << "Printing time histogram: " << time_hist_.ToString(); - VLOG(1) << "Active session runs: " << num_active_requests; - uint64 now = tensorflow::Env::Default()->NowMicros(); - string ranges_str = ""; - string times_str = ""; - for (int i = 0; i < num_active_requests; ++i) { - if (i > 0) { - times_str += " "; - ranges_str += " "; - } - - times_str += strings::StrCat( - (now - sorted_active_handlers_[i]->start_time_us()) / 1000.0, " ms."); - ranges_str += strings::StrCat("[", inter_op_start_[i], ", ", - inter_op_limit_[i], ")"); - } - VLOG(1) << "Elapsed times are: " << times_str; - VLOG(1) << "Ranges are: " << ranges_str; - } -} - -void RunHandler::Impl::ScheduleInterOpClosure(std::function fn) { - std::uint_fast32_t start = 0, limit = 0; - DecodePartition(inter_op_scheduling_range(), &start, &limit); - pool_impl_->inter_op_thread_pool()->Schedule(std::move(fn)); -} - -void RunHandler::Impl::Reset() { - set_inter_op_scheduling_range( - 0, pool_impl_->inter_op_thread_pool()->NumThreads()); - start_time_us_ = tensorflow::Env::Default()->NowMicros(); -} - -RunHandlerPool::RunHandlerPool(int num_inter_op_threads) - : impl_(new Impl(num_inter_op_threads)) {} - -RunHandlerPool::~RunHandlerPool() {} - -std::unique_ptr RunHandlerPool::Get() { return impl_->Get(); } - -RunHandler::RunHandler(Impl* impl) : impl_(impl) {} - -void RunHandler::ScheduleInterOpClosure(std::function fn) { - impl_->ScheduleInterOpClosure(std::move(fn)); -} - -RunHandler::~RunHandler() { impl_->pool_impl()->ReleaseHandler(impl_); } -} // namespace tensorflow diff --git a/tensorflow/core/framework/run_handler.h b/tensorflow/core/framework/run_handler.h deleted file mode 100644 index 72fa6301b4..0000000000 --- a/tensorflow/core/framework/run_handler.h +++ /dev/null @@ -1,95 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_ -#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_ - -#include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/lib/histogram/histogram.h" -#include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/thread_annotations.h" -#include "tensorflow/core/protobuf/config.pb.h" - -namespace tensorflow { - -class RunHandler; - -// RunHandlerPool is a fixed size pool of pre-allocated RunHandlers -// that can be used for tracking inter-op work for a given Session::Run(). -// RunHandler(s) in the pool are initially 'inactive'. A RunHandler becomes -// 'active' when its unique_ptr is returned by Get() and is being used by a -// client. It becomes 'inactive' once more when its unique_ptr gets destroyed. -// -// Expected usage: -// -// * Create a single RunHandlerPool (say run_handler_pool_). -// -// * When a Session::Run() is invoked, obtain a handler by: -// auto handler = run_handler_pool_->Get(); -// -// * Use handler for scheduling all inter-op work by: -// handler->ScheduleInterOpClosure(closure); -// -// This class is thread safe. -class RunHandlerPool { - public: - explicit RunHandlerPool(int num_inter_op_threads); - ~RunHandlerPool(); - - // Returns an inactive RunHandler from the pool. - // - // RunHandlers in RunHandlerPool are initially 'inactive'. - // A RunHandler becomes 'active' when its unique_ptr its returned by Get() - // and is being used by a client. It becomes 'inactive' once more when the - // unique_ptr is destroyed. - // - // Will block unless there is an inactive handler. - std::unique_ptr Get(); - - private: - class Impl; - friend class RunHandler; - - std::unique_ptr impl_; -}; - -// RunHandler can be used to schedule inter-op closures to run on a global pool -// shared across all Session::Run(s). -// -// It can only be created via RunHandlerPool::Get(). -// -// This class can be used instead of directly scheduling closures on a global -// pool since it maintains a global view across all sessions and optimizes pool -// scheduling to improve (median and tail) latency. -// -// This class is thread safe. -class RunHandler { - public: - void ScheduleInterOpClosure(std::function fn); - - ~RunHandler(); - - private: - class Impl; - friend class RunHandlerPool::Impl; - - explicit RunHandler(Impl* impl); - - Impl* impl_; // NOT OWNED. -}; - -} // end namespace tensorflow. - -#endif // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_H_ diff --git a/tensorflow/core/framework/run_handler_util.cc b/tensorflow/core/framework/run_handler_util.cc deleted file mode 100644 index 3087998c69..0000000000 --- a/tensorflow/core/framework/run_handler_util.cc +++ /dev/null @@ -1,57 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/framework/run_handler_util.h" - -#include -#include -#include "tensorflow/core/platform/logging.h" - -namespace tensorflow { - -void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads, - int min_threads_per_request, - std::vector* start_vec, - std::vector* end_vec) { - // Each request is expected to have weight W[i] = num_active_requests - i. - // Therefore, total_weight = sum of all request weights. - float total_weight = 0.5f * num_active_requests * (num_active_requests + 1); - float demand_factor = static_cast(num_threads) / total_weight; - float last_cumulative_weight = 0.0; - min_threads_per_request = std::max(1, min_threads_per_request); - for (int i = 0; i != num_active_requests; i++) { - float cumulative_weight = - static_cast(i + 1) * - (num_active_requests - static_cast(i) * 0.5f); - float weight = cumulative_weight - last_cumulative_weight; - // Quantize thread_demand by rounding up, and also satisfying - // `min_threads_per_request` constraint. - // Note: We subtract a small epsilon (0.00001) to prevent ceil(..) from - // rounding weights like 4.0 to 5. - int demand = - std::max(min_threads_per_request, - static_cast(ceil(weight * demand_factor - 0.00001f))); - // For the quantized range [start, end); compute the floor of real start, - // and expand downwards from there with length `demand` and adjust for - // boundary conditions. - int start = last_cumulative_weight * demand_factor; - int end = std::min(num_threads, start + demand); - start = std::max(0, std::min(start, end - demand)); - start_vec->at(i) = start; - end_vec->at(i) = end; - last_cumulative_weight = cumulative_weight; - } -} -} // namespace tensorflow diff --git a/tensorflow/core/framework/run_handler_util.h b/tensorflow/core/framework/run_handler_util.h deleted file mode 100644 index c0c36aeccb..0000000000 --- a/tensorflow/core/framework/run_handler_util.h +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_ -#define TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_ - -#include -#include - -namespace tensorflow { - -// Assign thread ranges to requests. -// Requests are numbered 0...num_active_requests-1, and -// threads are numbered 0...num_threads-1. -// On return, the range start_vec->at(i)...end_vec->at(i)-1 -// indicates the subrange of the threads available to request i. -// The ranges given to different requests may overlap. -// Lower numbered requests will tend to be assigned more threads. -// Thus, a client might associate older requests with lower -// array indices so they receive access to more threads. -// However, the routine ensures that each request is given access -// to at least min(min_threads_per_request, num_threads) threads. -// Every thread will be assigned to at least one request range, -// assuming there is at least one request. -void ComputeInterOpSchedulingRanges(int num_active_requests, int num_threads, - int min_threads_per_request, - std::vector* start_vec, - std::vector* end_vec); - -} // end namespace tensorflow -#endif // TENSORFLOW_CORE_FRAMEWORK_RUN_HANDLER_UTIL_H_ diff --git a/tensorflow/core/framework/run_handler_util_test.cc b/tensorflow/core/framework/run_handler_util_test.cc deleted file mode 100644 index a1928c132b..0000000000 --- a/tensorflow/core/framework/run_handler_util_test.cc +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/framework/run_handler_util.h" - -#include -#include "tensorflow/core/lib/strings/strcat.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/test.h" -namespace tensorflow { -namespace { - -void VerifyFunction(int num_active_requests, int num_threads, - int min_threads_per_request, bool print_stats = false) { - if (print_stats) { - LOG(INFO) << "Test case# num_active_requests: " << num_active_requests - << " num_threads: " << num_threads - << " min_threads: " << min_threads_per_request; - } - std::vector start(num_active_requests); - std::vector end(num_active_requests); - - ComputeInterOpSchedulingRanges(num_active_requests, num_threads, - min_threads_per_request, &start, &end); - string range_str = ""; - for (int i = 0; i < num_active_requests; ++i) { - if (i > 0) range_str += " "; - range_str += strings::StrCat("[", start[i], ", ", end[i], ")"); - - ASSERT_GE(start[i], 0) << range_str; - ASSERT_LE(end[i], num_threads) << range_str; - if (i > 0) { - // Due to linearly decreasing demand, #threads(i - 1) >= #threads(i) - ASSERT_GE(end[i - 1] - start[i - 1], end[i] - start[i]) << range_str; - // No missing threads. - ASSERT_GE(end[i - 1], start[i]) << range_str; - } - // Each interval is at least of size 'min_threads_per_request'. - ASSERT_GE((end[i] - start[i]), min_threads_per_request) << range_str; - // Verify that assigned (quantized) threads is not overly estimated - // from real demand, when the demand is high (>= - // min_threads_per_request). - float entry_weight = num_active_requests - i; - float total_weight = 0.5f * num_active_requests * (num_active_requests + 1); - float thread_demand = (entry_weight * num_threads) / total_weight; - if (thread_demand > min_threads_per_request) { - // We expect some over-estimation of threads due to quantization, - // but we hope it's not more than 1 extra thread. - ASSERT_NEAR(end[i] - start[i], thread_demand, 1.0) - << "Ranges: " << range_str << " thread_demand: " << thread_demand - << " i: " << i; - } - } - ASSERT_EQ(end[num_active_requests - 1], num_threads); - ASSERT_EQ(start[0], 0); - if (print_stats) { - LOG(INFO) << "Assigned ranges: " << range_str; - } -} - -TEST(RunHandlerUtilTest, TestComputeInterOpSchedulingRanges) { - const int kMinThreadsPerRequestBound = 12; - const int kMaxActiveRequests = 128; - const int kMaxThreads = 128; - - for (int min_threads_per_request = 1; - min_threads_per_request <= kMinThreadsPerRequestBound; - ++min_threads_per_request) { - for (int num_active_requests = 1; num_active_requests <= kMaxActiveRequests; - ++num_active_requests) { - for (int num_threads = min_threads_per_request; - num_threads <= kMaxThreads; ++num_threads) { - VerifyFunction(num_active_requests, num_threads, - min_threads_per_request); - } - } - } -} - -} // namespace -} // namespace tensorflow diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 104ab039cb..85cd02350a 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -453,11 +453,6 @@ message RunOptions { // same group_key value (in a distributed computation where tasks // run disjoint graphs). int64 collective_graph_key = 1; - // If true, then operations (using the inter-op pool) across all - // session::run() calls will be centrally scheduled, optimizing for (median - // and tail) latency. - // Consider using this option for CPU-bound workloads like inference. - bool use_run_handler_pool = 2; }; Experimental experimental = 8; diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt index 47b5b56faf..537e73aa89 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.-experimental.pbtxt @@ -8,11 +8,5 @@ tf_proto { label: LABEL_OPTIONAL type: TYPE_INT64 } - field { - name: "use_run_handler_pool" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_BOOL - } } } diff --git a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt index c0c2e7b9f8..cec04a2bf0 100644 --- a/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt +++ b/tensorflow/tools/api/golden/v1/tensorflow.-run-options.pbtxt @@ -55,12 +55,6 @@ tf_proto { label: LABEL_OPTIONAL type: TYPE_INT64 } - field { - name: "use_run_handler_pool" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_BOOL - } } enum_type { name: "TraceLevel" diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt index 47b5b56faf..537e73aa89 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.-experimental.pbtxt @@ -8,11 +8,5 @@ tf_proto { label: LABEL_OPTIONAL type: TYPE_INT64 } - field { - name: "use_run_handler_pool" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_BOOL - } } } diff --git a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt index c0c2e7b9f8..cec04a2bf0 100644 --- a/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt +++ b/tensorflow/tools/api/golden/v2/tensorflow.-run-options.pbtxt @@ -55,12 +55,6 @@ tf_proto { label: LABEL_OPTIONAL type: TYPE_INT64 } - field { - name: "use_run_handler_pool" - number: 2 - label: LABEL_OPTIONAL - type: TYPE_BOOL - } } enum_type { name: "TraceLevel" -- GitLab From 5f67bf69d3f53d1cd3bb86ebeeb03ea2bba5911b Mon Sep 17 00:00:00 2001 From: Saurabh Saxena Date: Thu, 27 Sep 2018 16:16:26 -0700 Subject: [PATCH 109/570] Support nested variants in CopyHostToDevice and CopyDeviceToHost. PiperOrigin-RevId: 214853860 --- tensorflow/core/common_runtime/copy_tensor.cc | 82 +++++++++++-------- tensorflow/python/kernel_tests/BUILD | 4 +- .../python/kernel_tests/list_ops_test.py | 26 ++++++ 3 files changed, 75 insertions(+), 37 deletions(-) diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc index d800a86199..6e2eb66b94 100644 --- a/tensorflow/core/common_runtime/copy_tensor.cc +++ b/tensorflow/core/common_runtime/copy_tensor.cc @@ -61,26 +61,33 @@ void CopyHostToDevice(const Tensor* input, Allocator* cpu_allocator, status_cb->Unref(); }; auto copier = std::bind( - [dst, recv_dev_context, out_allocator, status_cb]( - StatusCallback wrapped_done_, - // Begin unbound arguments - const Tensor& from, Tensor* to) { - if (!DMAHelper::CanUseDMA(&from)) { - Status err = errors::InvalidArgument( - "During Variant Host->Device Copy: " - "non-DMA-copy attempted of tensor type: ", - DataTypeString(from.dtype())); - status_cb->UpdateStatus(err); - return err; - } - if (status_cb->ok()) { + [dst, recv_dev_context, out_allocator, status_cb, cpu_allocator, + edge_name](StatusCallback wrapped_done_, + // Begin unbound arguments + const Tensor& from, Tensor* to) { + if (from.dtype() == DT_VARIANT) { status_cb->Ref(); - *to = Tensor(out_allocator, from.dtype(), from.shape()); - recv_dev_context->CopyCPUTensorToDevice(&from, dst, to, - wrapped_done_); + CopyHostToDevice(&from, cpu_allocator, out_allocator, edge_name, + dst, to, recv_dev_context, wrapped_done_); return Status::OK(); } else { - return status_cb->status(); + if (!DMAHelper::CanUseDMA(&from)) { + Status err = errors::InvalidArgument( + "During Variant Host->Device Copy: " + "non-DMA-copy attempted of tensor type: ", + DataTypeString(from.dtype())); + status_cb->UpdateStatus(err); + return err; + } + if (status_cb->ok()) { + status_cb->Ref(); + *to = Tensor(out_allocator, from.dtype(), from.shape()); + recv_dev_context->CopyCPUTensorToDevice(&from, dst, to, + wrapped_done_); + return Status::OK(); + } else { + return status_cb->status(); + } } }, std::move(wrapped_done), std::placeholders::_1, std::placeholders::_2); @@ -119,26 +126,33 @@ void CopyDeviceToHost(const Tensor* input, Allocator* cpu_allocator, status_cb->Unref(); }; auto copier = std::bind( - [edge_name, src, send_dev_context, out_allocator, status_cb]( - StatusCallback wrapped_done_, - // Begin unbound arguments - const Tensor& from, Tensor* to) { - if (!DMAHelper::CanUseDMA(&from)) { - Status err = errors::InvalidArgument( - "During Variant Device->Host Copy: " - "non-DMA-copy attempted of tensor type: ", - DataTypeString(from.dtype())); - status_cb->UpdateStatus(err); - return err; - } - if (status_cb->ok()) { + [edge_name, src, send_dev_context, out_allocator, status_cb, + cpu_allocator](StatusCallback wrapped_done_, + // Begin unbound arguments + const Tensor& from, Tensor* to) { + if (from.dtype() == DT_VARIANT) { status_cb->Ref(); - *to = Tensor(out_allocator, from.dtype(), from.shape()); - send_dev_context->CopyDeviceTensorToCPU(&from, edge_name, src, to, - wrapped_done_); + CopyDeviceToHost(&from, cpu_allocator, out_allocator, edge_name, + src, to, send_dev_context, wrapped_done_); return Status::OK(); } else { - return status_cb->status(); + if (!DMAHelper::CanUseDMA(&from)) { + Status err = errors::InvalidArgument( + "During Variant Device->Host Copy: " + "non-DMA-copy attempted of tensor type: ", + DataTypeString(from.dtype())); + status_cb->UpdateStatus(err); + return err; + } + if (status_cb->ok()) { + status_cb->Ref(); + *to = Tensor(out_allocator, from.dtype(), from.shape()); + send_dev_context->CopyDeviceTensorToCPU(&from, edge_name, src, to, + wrapped_done_); + return Status::OK(); + } else { + return status_cb->status(); + } } }, std::move(wrapped_done), std::placeholders::_1, std::placeholders::_2); diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index c2e36e5e19..280c18ec00 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -3257,8 +3257,7 @@ tf_py_test( tags = ["no_gpu"], # TODO(b/111656070) ) -# TODO(b/116053459): Replace with cuda_py_test. -tf_py_test( +cuda_py_test( name = "while_v2_test", size = "medium", srcs = ["while_v2_test.py"], @@ -3278,5 +3277,4 @@ tf_py_test( "//tensorflow/python:while_v2", ], grpc_enabled = True, - tags = ["no_gpu"], # TODO(b/116053459) ) diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py index 0f5607712b..ae413edaec 100644 --- a/tensorflow/python/kernel_tests/list_ops_test.py +++ b/tensorflow/python/kernel_tests/list_ops_test.py @@ -170,6 +170,32 @@ class ListOpsTest(test_util.TensorFlowTestCase): list_ops.tensor_list_pop_back( l_cpu, element_dtype=dtypes.float32)[1]), 2.0) + @test_util.run_in_graph_and_eager_modes + def testCPUGPUCopyNested(self): + if not context.num_gpus(): + return + t = constant_op.constant([1.0, 2.0]) + child_l = list_ops.tensor_list_from_tensor(t, element_shape=scalar_shape()) + l = list_ops.empty_tensor_list( + element_shape=constant_op.constant([], dtype=dtypes.int32), + element_dtype=dtypes.variant) + l = list_ops.tensor_list_push_back(l, child_l) + with context.device("gpu:0"): + l_gpu = array_ops.identity(l) + _, child_l_gpu = list_ops.tensor_list_pop_back( + l_gpu, element_dtype=dtypes.variant) + self.assertAllEqual( + self.evaluate( + list_ops.tensor_list_pop_back( + child_l_gpu, element_dtype=dtypes.float32)[1]), 2.0) + l_cpu = array_ops.identity(l_gpu) + _, child_l_cpu = list_ops.tensor_list_pop_back( + l_cpu, element_dtype=dtypes.variant) + self.assertAllEqual( + self.evaluate( + list_ops.tensor_list_pop_back( + child_l_cpu, element_dtype=dtypes.float32)[1]), 2.0) + def testGraphStack(self): with self.cached_session(): tl = list_ops.empty_tensor_list( -- GitLab From 2330933ddd0b29ad206e351c9120e621cdaf6312 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 27 Sep 2018 16:19:09 -0700 Subject: [PATCH 110/570] Rename TFLite Extended -> TFLite Flex PiperOrigin-RevId: 214854303 --- tensorflow/contrib/lite/build_def.bzl | 4 ++-- tensorflow/contrib/lite/model.cc | 4 ++-- tensorflow/contrib/lite/python/convert.py | 8 ++++---- tensorflow/contrib/lite/python/lite_test.py | 4 ++-- tensorflow/contrib/lite/testing/generate_examples.py | 10 +++++----- .../testing/model_coverage/model_coverage_lib_test.py | 2 +- tensorflow/contrib/lite/tools/benchmark/BUILD | 4 ++-- .../lite/tools/benchmark/benchmark_tflite_model.cc | 8 ++++---- .../lite/tools/benchmark/benchmark_tflite_model.h | 8 ++++---- 9 files changed, 26 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 7f5c6bdc2f..7ef26de69f 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -301,7 +301,7 @@ def generated_test_conversion_modes(): """Returns a list of conversion modes.""" # TODO(nupurgarg): Add "pb2lite" when it's in open source. b/113614050. - return ["toco-extended", ""] + return ["toco-flex", ""] def generated_test_models_all(): """Generates a list of all tests with the different converters. @@ -335,7 +335,7 @@ def gen_zip_test(name, test_name, conversion_mode, **kwargs): # TODO(nupurgarg): Comment in when pb2lite is in open source. b/113614050. # if conversion_mode == "pb2lite": # toco = "//tensorflow/contrib/lite/experimental/pb2lite:pb2lite" - flags = "--ignore_toco_errors --run_with_extended" + flags = "--ignore_toco_errors --run_with_flex" kwargs["tags"].append("skip_already_failing") kwargs["tags"].append("no_oss") kwargs["tags"].append("notap") diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index eff6181a61..d50c345194 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -27,7 +27,7 @@ limitations under the License. #ifndef TFLITE_MCU #include "tensorflow/contrib/lite/nnapi_delegate.h" #endif -#if defined(TFLITE_EXTENDED) +#if defined(TFLITE_FLEX) #include "tensorflow/contrib/lite/delegates/flex/delegate.h" #endif #include "tensorflow/contrib/lite/version.h" @@ -450,7 +450,7 @@ TfLiteStatus InterpreterBuilder::operator()( } (**interpreter).SetVariables(std::move(variables)); -#if defined(TFLITE_EXTENDED) +#if defined(TFLITE_FLEX) if (auto delegate = FlexDelegate::Create()) { (**interpreter) .ModifyGraphWithDelegate(std::move(delegate), diff --git a/tensorflow/contrib/lite/python/convert.py b/tensorflow/contrib/lite/python/convert.py index 73a420c47b..613a1530f7 100644 --- a/tensorflow/contrib/lite/python/convert.py +++ b/tensorflow/contrib/lite/python/convert.py @@ -67,12 +67,12 @@ class ConverterMode(enum.Enum): # Convert model using TOCO such that only unsupported operations are # represented as TensorFlow ops. # WARNING: Experimental interface, subject to change. - TOCO_EXTENDED = "TOCO_EXTENDED" + TOCO_FLEX = "TOCO_FLEX" # Convert model using TOCO such that all operations are represented as # TensorFlow ops. # WARNING: Experimental interface, subject to change. - TOCO_EXTENDED_ALL = "TOCO_EXTENDED_ALL" + TOCO_FLEX_ALL = "TOCO_FLEX_ALL" def __str__(self): return self.value @@ -240,9 +240,9 @@ def build_toco_convert_protos(input_tensors, if dump_graphviz_dir: toco.dump_graphviz_dir = dump_graphviz_dir toco.dump_graphviz_include_video = dump_graphviz_video - if converter_mode == ConverterMode.TOCO_EXTENDED: + if converter_mode == ConverterMode.TOCO_FLEX: toco.allow_flex_ops = True - elif converter_mode == ConverterMode.TOCO_EXTENDED_ALL: + elif converter_mode == ConverterMode.TOCO_FLEX_ALL: toco.allow_flex_ops = True toco.force_flex_ops = True diff --git a/tensorflow/contrib/lite/python/lite_test.py b/tensorflow/contrib/lite/python/lite_test.py index 7b0df01d1d..d243a494f6 100644 --- a/tensorflow/contrib/lite/python/lite_test.py +++ b/tensorflow/contrib/lite/python/lite_test.py @@ -412,7 +412,7 @@ class FromSessionTest(test_util.TensorFlowTestCase): # Ensure that the quantized weights tflite model is smaller. self.assertTrue(len(quantized_tflite) < len(float_tflite)) - def testExtendedMode(self): + def testFlexMode(self): in_tensor = array_ops.placeholder( shape=[1, 16, 16, 3], dtype=dtypes.float32) out_tensor = in_tensor + in_tensor @@ -421,7 +421,7 @@ class FromSessionTest(test_util.TensorFlowTestCase): # Convert model and ensure model is not None. converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) - converter.converter_mode = lite.ConverterMode.TOCO_EXTENDED_ALL + converter.converter_mode = lite.ConverterMode.TOCO_FLEX_ALL tflite_model = converter.convert() self.assertTrue(tflite_model) diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 53bd88d087..18036fac6f 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -81,9 +81,9 @@ parser.add_argument( action="store_true", help="Include intermediate graphdefs in the output zip files.") parser.add_argument( - "--run_with_extended", + "--run_with_flex", action="store_true", - help="Whether the TFLite Extended converter is being used.") + help="Whether the TFLite Flex converter is being used.") RANDOM_SEED = 342 TEST_INPUT_DEPTH = 3 @@ -339,10 +339,10 @@ def toco_convert(graph_def_str, input_tensors, output_tensors, graphdef_file.flush() # TODO(aselle): Switch this to subprocess at some point. - if "pb2lite" in bin_path and FLAGS.run_with_extended: + if "pb2lite" in bin_path and FLAGS.run_with_flex: opts = ("--input_arrays={0} --output_arrays={1}".format( ",".join(input_arrays), ",".join(output_tensors))) - elif FLAGS.run_with_extended: + elif FLAGS.run_with_flex: opts += " --allow_flex_ops --force_flex_ops" cmd = ("%s --input_file=%s --output_file=%s %s > %s 2>&1" % (bin_path, graphdef_file.name, output_file.name, opts, @@ -3333,7 +3333,7 @@ def main(unused_args): # list of valid conversion modes is defined in # generated_test_conversion_modes() in build_def.bzl. test_function = ("make_%s_tests" % (out.replace(".zip", "").replace( - "pb2lite", "").replace("toco-extended", "").rstrip("_"))) + "pb2lite", "").replace("toco-flex", "").rstrip("_"))) if test_function not in globals(): raise RuntimeError("Can't find a test function to create %r. Tried %r" % (out, test_function)) diff --git a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py index 5f3355e734..1498f86c6f 100644 --- a/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py +++ b/tensorflow/contrib/lite/testing/model_coverage/model_coverage_lib_test.py @@ -123,7 +123,7 @@ class EvaluateKerasModel(test.TestCase): keras_file = self._saveKerasModel(model) model_coverage.test_keras_model( - keras_file, converter_mode=lite.ConverterMode.TOCO_EXTENDED) + keras_file, converter_mode=lite.ConverterMode.TOCO_FLEX) if __name__ == '__main__': diff --git a/tensorflow/contrib/lite/tools/benchmark/BUILD b/tensorflow/contrib/lite/tools/benchmark/BUILD index bc18d40313..502e181139 100644 --- a/tensorflow/contrib/lite/tools/benchmark/BUILD +++ b/tensorflow/contrib/lite/tools/benchmark/BUILD @@ -40,7 +40,7 @@ cc_binary( srcs = [ "benchmark_main.cc", ], - copts = common_copts + ["-DTFLITE_EXTENDED"], + copts = common_copts + ["-DTFLITE_FLEX"], linkopts = tflite_linkopts() + select({ "//tensorflow:android": [ "-pie", # Android 5.0 and later supports only PIE @@ -117,7 +117,7 @@ cc_library( "logging.h", ], hdrs = ["benchmark_tflite_model.h"], - copts = common_copts + ["-DTFLITE_EXTENDED"], + copts = common_copts + ["-DTFLITE_FLEX"], deps = [ ":benchmark_model_lib", ":logging", diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc index d989ee720d..463d5993f4 100644 --- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.cc @@ -23,9 +23,9 @@ limitations under the License. #include #include -#ifdef TFLITE_EXTENDED +#ifdef TFLITE_FLEX #include "tensorflow/contrib/lite/delegates/flex/delegate.h" -#endif // TFLITE_EXTENDED +#endif // TFLITE_FLEX #include "tensorflow/contrib/lite/kernels/register.h" #include "tensorflow/contrib/lite/model.h" #include "tensorflow/contrib/lite/op_resolver.h" @@ -305,14 +305,14 @@ void BenchmarkTfLiteModel::Init() { interpreter->UseNNAPI(use_nnapi); -#ifdef TFLITE_EXTENDED +#ifdef TFLITE_FLEX TFLITE_LOG(INFO) << "Instantiating Flex Delegate"; delegate_ = FlexDelegate::Create(); if (delegate_) { interpreter->ModifyGraphWithDelegate(delegate_.get(), /*allow_dynamic_tensors=*/true); } -#endif // TFLITE_EXTENDED +#endif // TFLITE_FLEX auto interpreter_inputs = interpreter->inputs(); diff --git a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h index 9343824b4a..b091e18a29 100644 --- a/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h +++ b/tensorflow/contrib/lite/tools/benchmark/benchmark_tflite_model.h @@ -20,9 +20,9 @@ limitations under the License. #include #include -#ifdef TFLITE_EXTENDED +#ifdef TFLITE_FLEX #include "tensorflow/contrib/lite/delegates/flex/delegate.h" -#endif // TFLITE_EXTENDED +#endif // TFLITE_FLEX #include "tensorflow/contrib/lite/model.h" #include "tensorflow/contrib/lite/profiling/profile_summarizer.h" #include "tensorflow/contrib/lite/tools/benchmark/benchmark_model.h" @@ -73,9 +73,9 @@ class BenchmarkTfLiteModel : public BenchmarkModel { void PrepareInputsAndOutputs() override; private: -#ifdef TFLITE_EXTENDED +#ifdef TFLITE_FLEX std::unique_ptr delegate_; -#endif // TFLITE_EXTENDED +#endif // TFLITE_FLEX std::unique_ptr model; std::unique_ptr interpreter; std::vector inputs; -- GitLab From 0a9ee95ed9c26bef58e9daadcb6935807d90fcd3 Mon Sep 17 00:00:00 2001 From: Yanan Cao Date: Thu, 27 Sep 2018 17:04:17 -0700 Subject: [PATCH 111/570] Disable summary ops from lower-level xla.compile API rather than xla.estimator_model_fn PiperOrigin-RevId: 214860981 --- tensorflow/contrib/compiler/xla.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/compiler/xla.py b/tensorflow/contrib/compiler/xla.py index 1e30525159..873b03580d 100644 --- a/tensorflow/contrib/compiler/xla.py +++ b/tensorflow/contrib/compiler/xla.py @@ -293,7 +293,8 @@ def _compile_internal(computation, inputs=None): saved_use_resource = vscope.use_resource vscope.set_use_resource(True) - outputs = computation(*computation_inputs) + with _disable_summary_context(): + outputs = computation(*computation_inputs) # Restore variable scope after computation. vscope.set_use_resource(saved_use_resource) @@ -371,13 +372,13 @@ def _disable_summary_context(): Yields: None. """ - origional_skip_summary_func = summary_op_util.skip_summary + original_skip_summary_func = summary_op_util.skip_summary summary_op_util.skip_summary = lambda: True try: yield finally: - summary_op_util.skip_summary = origional_skip_summary_func + summary_op_util.skip_summary = original_skip_summary_func class _CapturedObject(object): @@ -436,8 +437,7 @@ class _ModelFnWrapper(object): if mode == model_fn_lib.ModeKeys.TRAIN: train_step, captured_scaffold_fn = self._make_train_step( features, labels, params) - with _disable_summary_context(): - (loss,) = compile(train_step) + (loss,) = compile(train_step) return model_fn_lib.EstimatorSpec( mode=mode, loss=loss, @@ -446,8 +446,7 @@ class _ModelFnWrapper(object): elif mode == model_fn_lib.ModeKeys.EVAL: eval_step, captured_eval_metric_fn, captured_scaffold_fn = ( self._make_eval_step(features, labels, params)) - with _disable_summary_context(): - outputs = compile(eval_step) + outputs = compile(eval_step) loss = outputs[0] # Calculate eval_metric_ops if eval_metric_fn is set and captured. -- GitLab From 9b8390e7cd664d8fad9dd3f7172a56135585b481 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Thu, 27 Sep 2018 17:04:19 -0700 Subject: [PATCH 112/570] Remove testing non-core APIs from api_compatibility_test. Some APIs are moving out of core TF repo. These APIs will have their own API compat tests. Adding flag --only_test_core_api=true which will not check for changes to non-core APIs. PiperOrigin-RevId: 214860984 --- tensorflow/tools/api/tests/BUILD | 1 + .../tools/api/tests/api_compatibility_test.py | 39 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/tensorflow/tools/api/tests/BUILD b/tensorflow/tools/api/tests/BUILD index 4efa4a9651..3cbea41dca 100644 --- a/tensorflow/tools/api/tests/BUILD +++ b/tensorflow/tools/api/tests/BUILD @@ -19,6 +19,7 @@ py_test( "api_compatibility_test.py", "//tensorflow:tf_python_api_gen_v2", ], + args = ["--only_test_core_api=true"], data = [ "//tensorflow/tools/api/golden:api_golden_v1", "//tensorflow/tools/api/golden:api_golden_v2", diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index d06c7f2d49..6487a6267e 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -56,6 +56,14 @@ _UPDATE_GOLDENS_HELP = """ have to be authorized by TensorFlow leads. """ +# DEFINE_boolean, only_test_core_api, default False: +_ONLY_TEST_CORE_API_HELP = """ + Some TF APIs are being moved outside of the tensorflow/ directory. There is + no garuntee which versions of these APIs will be present when running this + test. Therefore, do not error out on API changes in non-core TF code + if this flag is set. +""" + # DEFINE_boolean, verbose_diffs, default True: _VERBOSE_DIFFS_HELP = """ If set to true, print line by line diffs on all libraries. If set to @@ -67,6 +75,8 @@ _API_GOLDEN_FOLDER_V2 = 'tensorflow/tools/api/golden/v2' _TEST_README_FILE = 'tensorflow/tools/api/tests/README.txt' _UPDATE_WARNING_FILE = 'tensorflow/tools/api/tests/API_UPDATE_WARNING.txt' +_NON_CORE_PACKAGES = ['estimator'] + def _KeyToFilePath(key, api_version): """From a given key, construct a filepath. @@ -111,6 +121,19 @@ def _VerifyNoSubclassOfMessageVisitor(path, parent, unused_children): 'They are not yet supported by the API tools.' % path) +def _FilterNonCoreGoldenFiles(golden_file_list): + """Filter out non-core API pbtxt files.""" + filtered_file_list = [] + filtered_package_prefixes = [ + 'tensorflow.%s.' % p for p in _NON_CORE_PACKAGES] + for f in golden_file_list: + if any([f.rsplit('/')[-1].startswith(pre) + for pre in filtered_package_prefixes]): + continue + filtered_file_list.append(f) + return filtered_file_list + + class ApiCompatibilityTest(test.TestCase): def __init__(self, *args, **kwargs): @@ -233,6 +256,9 @@ class ApiCompatibilityTest(test.TestCase): return visitor = public_api.PublicAPIVisitor(_VerifyNoSubclassOfMessageVisitor) visitor.do_not_descend_map['tf'].append('contrib') + if FLAGS.only_test_core_api: + visitor.do_not_descend_map['tf'].extend( + _NON_CORE_PACKAGES) traverse.traverse(tf_v2.compat.v1, visitor) def testNoSubclassOfMessageV2(self): @@ -240,6 +266,9 @@ class ApiCompatibilityTest(test.TestCase): return visitor = public_api.PublicAPIVisitor(_VerifyNoSubclassOfMessageVisitor) visitor.do_not_descend_map['tf'].append('contrib') + if FLAGS.only_test_core_api: + visitor.do_not_descend_map['tf'].extend( + _NON_CORE_PACKAGES) traverse.traverse(tf_v2, visitor) def _checkBackwardsCompatibility( @@ -252,6 +281,9 @@ class ApiCompatibilityTest(test.TestCase): public_api_visitor.do_not_descend_map['tf'].append('contrib') public_api_visitor.do_not_descend_map['tf.GPUOptions'] = [ 'Experimental'] + if FLAGS.only_test_core_api: + public_api_visitor.do_not_descend_map['tf'].extend( + _NON_CORE_PACKAGES) if additional_private_map: public_api_visitor.private_map.update(additional_private_map) @@ -260,6 +292,8 @@ class ApiCompatibilityTest(test.TestCase): # Read all golden files. golden_file_list = file_io.get_matching_files(golden_file_pattern) + if FLAGS.only_test_core_api: + golden_file_list = _FilterNonCoreGoldenFiles(golden_file_list) def _ReadFileToProto(filename): """Read a filename, create a protobuf from its contents.""" @@ -325,6 +359,11 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument( '--update_goldens', type=bool, default=False, help=_UPDATE_GOLDENS_HELP) + # TODO(mikecase): Create Estimator's own API compatibility test or + # a more general API compatibility test for use for TF components. + parser.add_argument( + '--only_test_core_api', type=bool, default=False, + help=_ONLY_TEST_CORE_API_HELP) parser.add_argument( '--verbose_diffs', type=bool, default=True, help=_VERBOSE_DIFFS_HELP) FLAGS, unparsed = parser.parse_known_args() -- GitLab From 7fbc44d63b25eddfc384922809426319728f949c Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Thu, 27 Sep 2018 17:19:17 -0700 Subject: [PATCH 113/570] [Java]: Release 1.11.0 PiperOrigin-RevId: 214862838 --- tensorflow/java/maven/libtensorflow/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml | 2 +- tensorflow/java/maven/pom.xml | 2 +- tensorflow/java/maven/proto/pom.xml | 2 +- tensorflow/java/maven/spark-tensorflow-connector/pom.xml | 2 +- tensorflow/java/maven/tensorflow-hadoop/pom.xml | 2 +- tensorflow/java/maven/tensorflow/pom.xml | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml index 9fc6969c20..6b3e305e5d 100644 --- a/tensorflow/java/maven/libtensorflow/pom.xml +++ b/tensorflow/java/maven/libtensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.11.0-rc2 + 1.11.0 ../ libtensorflow diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml index 68712082e1..f130515934 100644 --- a/tensorflow/java/maven/libtensorflow_jni/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.11.0-rc2 + 1.11.0 ../ libtensorflow_jni diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml index f031173c99..67ecc2d597 100644 --- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.11.0-rc2 + 1.11.0 ../ libtensorflow_jni_gpu diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 2cac27990e..8ba859da01 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.11.0-rc2 + 1.11.0 pom https://www.tensorflow.org diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml index 8a93091276..dcd654d713 100644 --- a/tensorflow/java/maven/proto/pom.xml +++ b/tensorflow/java/maven/proto/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.11.0-rc2 + 1.11.0 ../ proto diff --git a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml index 014bd8d212..45214f834c 100644 --- a/tensorflow/java/maven/spark-tensorflow-connector/pom.xml +++ b/tensorflow/java/maven/spark-tensorflow-connector/pom.xml @@ -6,7 +6,7 @@ org.tensorflow spark-tensorflow-connector_2.11 jar - 1.11.0-rc2 + 1.11.0 spark-tensorflow-connector https://www.tensorflow.org TensorFlow TFRecord connector for Apache Spark DataFrames diff --git a/tensorflow/java/maven/tensorflow-hadoop/pom.xml b/tensorflow/java/maven/tensorflow-hadoop/pom.xml index d07c5fcd98..a8669ee72b 100644 --- a/tensorflow/java/maven/tensorflow-hadoop/pom.xml +++ b/tensorflow/java/maven/tensorflow-hadoop/pom.xml @@ -5,7 +5,7 @@ org.tensorflow tensorflow-hadoop jar - 1.11.0-rc2 + 1.11.0 tensorflow-hadoop https://www.tensorflow.org TensorFlow TFRecord InputFormat/OutputFormat for Apache Hadoop diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml index af0c68a4ed..67d628ba11 100644 --- a/tensorflow/java/maven/tensorflow/pom.xml +++ b/tensorflow/java/maven/tensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.11.0-rc2 + 1.11.0 ../ tensorflow -- GitLab From f7e5a4e5f1de355cbbe70215f08d962e027cd0dc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 17:20:54 -0700 Subject: [PATCH 114/570] Update ops-related pbtxt files. PiperOrigin-RevId: 214863042 --- .../core/ops/compat/ops_history.v1.pbtxt | 53 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 53 +++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 7625524674..32ce31cf23 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -44855,6 +44855,59 @@ op { } is_stateful: true } +op { + name: "ReduceDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "initial_state" + type_list_attr: "Tstate" + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "components" + type_list_attr: "output_types" + } + attr { + name: "f" + type: "func" + } + attr { + name: "Tstate" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "use_inter_op_parallelism" + type: "bool" + default_value { + b: true + } + } +} op { name: "ReduceJoin" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 83af07431c..02a7f8d717 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -22868,6 +22868,59 @@ op { } is_stateful: true } +op { + name: "ReduceDataset" + input_arg { + name: "input_dataset" + type: DT_VARIANT + } + input_arg { + name: "initial_state" + type_list_attr: "Tstate" + } + input_arg { + name: "other_arguments" + type_list_attr: "Targuments" + } + output_arg { + name: "components" + type_list_attr: "output_types" + } + attr { + name: "f" + type: "func" + } + attr { + name: "Tstate" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Targuments" + type: "list(type)" + has_minimum: true + } + attr { + name: "output_types" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "output_shapes" + type: "list(shape)" + has_minimum: true + minimum: 1 + } + attr { + name: "use_inter_op_parallelism" + type: "bool" + default_value { + b: true + } + } +} op { name: "ReduceJoin" input_arg { -- GitLab From c1f557705143f69988ec272f2cf659c7d525974c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 17:45:56 -0700 Subject: [PATCH 115/570] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 214866490 --- tensorflow/go/op/wrappers.go | 508 +++++++++++++++++------------------ 1 file changed, 254 insertions(+), 254 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 96df1eee30..2f297d5161 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -26837,6 +26837,260 @@ func DecodeGif(scope *Scope, contents tf.Output) (image tf.Output) { return op.Output(0) } +// LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler. +type LearnedUnigramCandidateSamplerAttr func(optionalAttr) + +// LearnedUnigramCandidateSamplerSeed sets the optional seed attribute to value. +// +// value: If either seed or seed2 are set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, it is seeded by a +// random seed. +// If not specified, defaults to 0 +func LearnedUnigramCandidateSamplerSeed(value int64) LearnedUnigramCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// LearnedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. +// +// value: An second seed to avoid seed collision. +// If not specified, defaults to 0 +func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSamplerAttr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// Generates labels for candidate sampling with a learned unigram distribution. +// +// See explanations of candidate sampling and the data formats at +// go/candidate-sampling. +// +// For each batch, this op picks a single set of sampled candidate labels. +// +// The advantages of sampling candidates per-batch are simplicity and the +// possibility of efficient dense matrix multiplication. The disadvantage is that +// the sampled candidates must be chosen independently of the context and of the +// true labels. +// +// Arguments: +// true_classes: A batch_size * num_true matrix, in which each row contains the +// IDs of the num_true target_classes in the corresponding original label. +// num_true: Number of true labels per context. +// num_sampled: Number of candidates to randomly sample. +// unique: If unique is true, we sample with rejection, so that all sampled +// candidates in a batch are unique. This requires some approximation to +// estimate the post-rejection sampling probabilities. +// range_max: The sampler will sample integers from the interval [0, range_max). +// +// Returns A vector of length num_sampled, in which each element is +// the ID of a sampled candidate.A batch_size * num_true matrix, representing +// the number of times each candidate is expected to occur in a batch +// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled +// candidate representing the number of times the candidate is expected +// to occur in a batch of sampled candidates. If unique=true, then this is a +// probability. +func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LearnedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "LearnedUnigramCandidateSampler", + Input: []tf.Input{ + true_classes, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1), op.Output(2) +} + +// SerializeSparseAttr is an optional argument to SerializeSparse. +type SerializeSparseAttr func(optionalAttr) + +// SerializeSparseOutType sets the optional out_type attribute to value. +// +// value: The `dtype` to use for serialization; the supported types are `string` +// (default) and `variant`. +// If not specified, defaults to DT_STRING +func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr { + return func(m optionalAttr) { + m["out_type"] = value + } +} + +// Serialize a `SparseTensor` into a `[3]` `Tensor` object. +// +// Arguments: +// sparse_indices: 2-D. The `indices` of the `SparseTensor`. +// sparse_values: 1-D. The `values` of the `SparseTensor`. +// sparse_shape: 1-D. The `shape` of the `SparseTensor`. +func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SerializeSparse", + Input: []tf.Input{ + sparse_indices, sparse_values, sparse_shape, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2. +type RandomShuffleQueueV2Attr func(optionalAttr) + +// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value. +// +// value: The shape of each component in a value. The length of this attr must +// be either 0 or the same as the length of component_types. If the length of +// this attr is 0, the shapes of queue elements are not constrained, and +// only one element may be dequeued at a time. +// If not specified, defaults to <> +// +// REQUIRES: len(value) >= 0 +func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["shapes"] = value + } +} + +// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value. +// +// value: The upper bound on the number of elements in this queue. +// Negative numbers mean no limit. +// If not specified, defaults to -1 +func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value. +// +// value: Dequeue will block unless there would be this +// many elements after the dequeue or the queue is closed. This +// ensures a minimum level of mixing of elements. +// If not specified, defaults to 0 +func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["min_after_dequeue"] = value + } +} + +// RandomShuffleQueueV2Seed sets the optional seed attribute to value. +// +// value: If either seed or seed2 is set to be non-zero, the random number +// generator is seeded by the given seed. Otherwise, a random seed is used. +// If not specified, defaults to 0 +func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["seed"] = value + } +} + +// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value. +// +// value: A second seed to avoid seed collision. +// If not specified, defaults to 0 +func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["seed2"] = value + } +} + +// RandomShuffleQueueV2Container sets the optional container attribute to value. +// +// value: If non-empty, this queue is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this queue will be shared under the given name +// across multiple sessions. +// If not specified, defaults to "" +func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// A queue that randomizes the order of elements. +// +// Arguments: +// component_types: The type of each component in a value. +// +// Returns The handle to the queue. +func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"component_types": component_types} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "RandomShuffleQueueV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Draw bounding boxes on a batch of images. +// +// Outputs a copy of `images` but draws on top of the pixels zero or more bounding +// boxes specified by the locations in `boxes`. The coordinates of the each +// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The +// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and +// height of the underlying image. +// +// For example, if an image is 100 x 200 pixels (height x width) and the bounding +// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of +// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). +// +// Parts of the bounding box may fall outside the image. +// +// Arguments: +// images: 4-D with shape `[batch, height, width, depth]`. A batch of images. +// boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding +// boxes. +// +// Returns 4-D with the same shape as `images`. The batch of input images with +// bounding boxes drawn on the images. +func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "DrawBoundingBoxes", + Input: []tf.Input{ + images, boxes, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Gets the next output from the given iterator. // // This operation is a synchronous version IteratorGetNext. It should only be used @@ -30988,260 +31242,6 @@ func TensorArraySplitV3(scope *Scope, handle tf.Output, value tf.Output, lengths return op.Output(0) } -// SerializeSparseAttr is an optional argument to SerializeSparse. -type SerializeSparseAttr func(optionalAttr) - -// SerializeSparseOutType sets the optional out_type attribute to value. -// -// value: The `dtype` to use for serialization; the supported types are `string` -// (default) and `variant`. -// If not specified, defaults to DT_STRING -func SerializeSparseOutType(value tf.DataType) SerializeSparseAttr { - return func(m optionalAttr) { - m["out_type"] = value - } -} - -// Serialize a `SparseTensor` into a `[3]` `Tensor` object. -// -// Arguments: -// sparse_indices: 2-D. The `indices` of the `SparseTensor`. -// sparse_values: 1-D. The `values` of the `SparseTensor`. -// sparse_shape: 1-D. The `shape` of the `SparseTensor`. -func SerializeSparse(scope *Scope, sparse_indices tf.Output, sparse_values tf.Output, sparse_shape tf.Output, optional ...SerializeSparseAttr) (serialized_sparse tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SerializeSparse", - Input: []tf.Input{ - sparse_indices, sparse_values, sparse_shape, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RandomShuffleQueueV2Attr is an optional argument to RandomShuffleQueueV2. -type RandomShuffleQueueV2Attr func(optionalAttr) - -// RandomShuffleQueueV2Shapes sets the optional shapes attribute to value. -// -// value: The shape of each component in a value. The length of this attr must -// be either 0 or the same as the length of component_types. If the length of -// this attr is 0, the shapes of queue elements are not constrained, and -// only one element may be dequeued at a time. -// If not specified, defaults to <> -// -// REQUIRES: len(value) >= 0 -func RandomShuffleQueueV2Shapes(value []tf.Shape) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["shapes"] = value - } -} - -// RandomShuffleQueueV2Capacity sets the optional capacity attribute to value. -// -// value: The upper bound on the number of elements in this queue. -// Negative numbers mean no limit. -// If not specified, defaults to -1 -func RandomShuffleQueueV2Capacity(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// RandomShuffleQueueV2MinAfterDequeue sets the optional min_after_dequeue attribute to value. -// -// value: Dequeue will block unless there would be this -// many elements after the dequeue or the queue is closed. This -// ensures a minimum level of mixing of elements. -// If not specified, defaults to 0 -func RandomShuffleQueueV2MinAfterDequeue(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["min_after_dequeue"] = value - } -} - -// RandomShuffleQueueV2Seed sets the optional seed attribute to value. -// -// value: If either seed or seed2 is set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, a random seed is used. -// If not specified, defaults to 0 -func RandomShuffleQueueV2Seed(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// RandomShuffleQueueV2Seed2 sets the optional seed2 attribute to value. -// -// value: A second seed to avoid seed collision. -// If not specified, defaults to 0 -func RandomShuffleQueueV2Seed2(value int64) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// RandomShuffleQueueV2Container sets the optional container attribute to value. -// -// value: If non-empty, this queue is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func RandomShuffleQueueV2Container(value string) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// RandomShuffleQueueV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this queue will be shared under the given name -// across multiple sessions. -// If not specified, defaults to "" -func RandomShuffleQueueV2SharedName(value string) RandomShuffleQueueV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// A queue that randomizes the order of elements. -// -// Arguments: -// component_types: The type of each component in a value. -// -// Returns The handle to the queue. -func RandomShuffleQueueV2(scope *Scope, component_types []tf.DataType, optional ...RandomShuffleQueueV2Attr) (handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"component_types": component_types} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "RandomShuffleQueueV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Draw bounding boxes on a batch of images. -// -// Outputs a copy of `images` but draws on top of the pixels zero or more bounding -// boxes specified by the locations in `boxes`. The coordinates of the each -// bounding box in `boxes` are encoded as `[y_min, x_min, y_max, x_max]`. The -// bounding box coordinates are floats in `[0.0, 1.0]` relative to the width and -// height of the underlying image. -// -// For example, if an image is 100 x 200 pixels (height x width) and the bounding -// box is `[0.1, 0.2, 0.5, 0.9]`, the upper-left and bottom-right coordinates of -// the bounding box will be `(40, 10)` to `(180, 50)` (in (x,y) coordinates). -// -// Parts of the bounding box may fall outside the image. -// -// Arguments: -// images: 4-D with shape `[batch, height, width, depth]`. A batch of images. -// boxes: 3-D with shape `[batch, num_bounding_boxes, 4]` containing bounding -// boxes. -// -// Returns 4-D with the same shape as `images`. The batch of input images with -// bounding boxes drawn on the images. -func DrawBoundingBoxes(scope *Scope, images tf.Output, boxes tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "DrawBoundingBoxes", - Input: []tf.Input{ - images, boxes, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// LearnedUnigramCandidateSamplerAttr is an optional argument to LearnedUnigramCandidateSampler. -type LearnedUnigramCandidateSamplerAttr func(optionalAttr) - -// LearnedUnigramCandidateSamplerSeed sets the optional seed attribute to value. -// -// value: If either seed or seed2 are set to be non-zero, the random number -// generator is seeded by the given seed. Otherwise, it is seeded by a -// random seed. -// If not specified, defaults to 0 -func LearnedUnigramCandidateSamplerSeed(value int64) LearnedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed"] = value - } -} - -// LearnedUnigramCandidateSamplerSeed2 sets the optional seed2 attribute to value. -// -// value: An second seed to avoid seed collision. -// If not specified, defaults to 0 -func LearnedUnigramCandidateSamplerSeed2(value int64) LearnedUnigramCandidateSamplerAttr { - return func(m optionalAttr) { - m["seed2"] = value - } -} - -// Generates labels for candidate sampling with a learned unigram distribution. -// -// See explanations of candidate sampling and the data formats at -// go/candidate-sampling. -// -// For each batch, this op picks a single set of sampled candidate labels. -// -// The advantages of sampling candidates per-batch are simplicity and the -// possibility of efficient dense matrix multiplication. The disadvantage is that -// the sampled candidates must be chosen independently of the context and of the -// true labels. -// -// Arguments: -// true_classes: A batch_size * num_true matrix, in which each row contains the -// IDs of the num_true target_classes in the corresponding original label. -// num_true: Number of true labels per context. -// num_sampled: Number of candidates to randomly sample. -// unique: If unique is true, we sample with rejection, so that all sampled -// candidates in a batch are unique. This requires some approximation to -// estimate the post-rejection sampling probabilities. -// range_max: The sampler will sample integers from the interval [0, range_max). -// -// Returns A vector of length num_sampled, in which each element is -// the ID of a sampled candidate.A batch_size * num_true matrix, representing -// the number of times each candidate is expected to occur in a batch -// of sampled candidates. If unique=true, then this is a probability.A vector of length num_sampled, for each sampled -// candidate representing the number of times the candidate is expected -// to occur in a batch of sampled candidates. If unique=true, then this is a -// probability. -func LearnedUnigramCandidateSampler(scope *Scope, true_classes tf.Output, num_true int64, num_sampled int64, unique bool, range_max int64, optional ...LearnedUnigramCandidateSamplerAttr) (sampled_candidates tf.Output, true_expected_count tf.Output, sampled_expected_count tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_true": num_true, "num_sampled": num_sampled, "unique": unique, "range_max": range_max} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "LearnedUnigramCandidateSampler", - Input: []tf.Input{ - true_classes, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1), op.Output(2) -} - // Computes gradients for the scaled exponential linear (Selu) operation. // // Arguments: -- GitLab From 4bab3e375b7fffbc8878313089a2bd680952aced Mon Sep 17 00:00:00 2001 From: Sourabh Bajaj Date: Thu, 27 Sep 2018 17:54:44 -0700 Subject: [PATCH 116/570] Change test size as it has been timing out consistently PiperOrigin-RevId: 214867453 --- tensorflow/contrib/distribute/python/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/distribute/python/BUILD b/tensorflow/contrib/distribute/python/BUILD index 7eead6e472..e329b964c4 100644 --- a/tensorflow/contrib/distribute/python/BUILD +++ b/tensorflow/contrib/distribute/python/BUILD @@ -453,7 +453,7 @@ cuda_py_test( cuda_py_test( name = "estimator_training_test", - size = "large", + size = "enormous", srcs = ["estimator_training_test.py"], additional_deps = [ ":combinations", -- GitLab From 96f3428e33e18477661b8d8cf78f2db457c8881b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 18:43:55 -0700 Subject: [PATCH 117/570] Let feature columns correctly handle rank-1 sparse tensors from an empty batch. reshape can't determine the size of the last dimension when reshaping shape (0) to (0, 1). PiperOrigin-RevId: 214872677 --- .../python/feature_column/feature_column.py | 2 +- .../python/feature_column/feature_column_test.py | 12 ++++++++++++ .../python/feature_column/feature_column_v2.py | 2 +- .../feature_column/feature_column_v2_test.py | 16 ++++++++++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index 226e273660..618e70f3a5 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -2318,7 +2318,7 @@ class _LazyBuilder(object): # Input_tensor must have rank 1. if isinstance(input_tensor, sparse_tensor_lib.SparseTensor): return sparse_ops.sparse_reshape( - input_tensor, [array_ops.shape(input_tensor)[0], -1]) + input_tensor, [array_ops.shape(input_tensor)[0], 1]) else: return array_ops.expand_dims(input_tensor, -1) diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index abb79efa68..1ae510250c 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -169,6 +169,18 @@ class LazyColumnTest(test.TestCase): TypeError, '"key" must be either a "str" or "_FeatureColumn".'): builder.get(NotAFeatureColumn()) + def test_expand_dim_rank_1_sparse_tensor_empty_batch(self): + # empty 1-D sparse tensor: + builder = _LazyBuilder(features={'a': sparse_tensor.SparseTensor( + indices=np.reshape(np.array([], dtype=np.int64), (0, 1)), + dense_shape=[0], + values=np.array([]))}) + with self.cached_session(): + spv = builder.get('a').eval() + self.assertAllEqual(np.array([0, 1], dtype=np.int64), spv.dense_shape) + self.assertAllEqual( + np.reshape(np.array([], dtype=np.int64), (0, 2)), spv.indices) + class NumericColumnTest(test.TestCase): diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index 289f6d0d14..538641c251 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -2341,7 +2341,7 @@ class FeatureTransformationCache(object): # Input_tensor must have rank 1. if isinstance(input_tensor, sparse_tensor_lib.SparseTensor): return sparse_ops.sparse_reshape( - input_tensor, [array_ops.shape(input_tensor)[0], -1]) + input_tensor, [array_ops.shape(input_tensor)[0], 1]) else: return array_ops.expand_dims(input_tensor, -1) diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index 58168e0f9e..2970431167 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -177,6 +177,22 @@ class LazyColumnTest(test.TestCase): TypeError, '"key" must be either a "str" or "FeatureColumn".'): transformation_cache.get(NotAFeatureColumn(), None) + def test_expand_dim_rank_1_sparse_tensor_empty_batch(self): + # empty 1-D sparse tensor: + transformation_cache = FeatureTransformationCache( + features={ + 'a': + sparse_tensor.SparseTensor( + indices=np.reshape(np.array([], dtype=np.int64), (0, 1)), + dense_shape=[0], + values=np.array([])) + }) + with self.cached_session(): + spv = transformation_cache.get('a', None).eval() + self.assertAllEqual(np.array([0, 1], dtype=np.int64), spv.dense_shape) + self.assertAllEqual( + np.reshape(np.array([], dtype=np.int64), (0, 2)), spv.indices) + class NumericColumnTest(test.TestCase): -- GitLab From 70f071f7afb2deffddbd9937d7a76b1e1c0b2b75 Mon Sep 17 00:00:00 2001 From: Revan Sopher Date: Thu, 27 Sep 2018 19:20:59 -0700 Subject: [PATCH 118/570] Fix failing test. PiperOrigin-RevId: 214875840 --- .../estimator_batch/dnn_tree_combined_estimator_test.py | 3 ++- .../contrib/boosted_trees/estimator_batch/estimator_test.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py index 04baa329a0..6b6fe9663a 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/dnn_tree_combined_estimator_test.py @@ -188,7 +188,8 @@ class CoreDNNBoostedTreeCombinedTest(test_util.TensorFlowTestCase): # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) - # 10 steps for dnn, 3 for 1 tree of depth 3 + 1 after the tree finished + # 10 steps for dnn + 3 for 1 tree of depth 3 + 1 after the tree finished + # + 1 for resource variables. self._assert_checkpoint(est.model_dir, global_step=15) res = est.evaluate(input_fn=_eval_input_fn, steps=1) self.assertLess(0.5, res["auc"]) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py index c155128c0e..d7b14e00ba 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator_test.py @@ -238,8 +238,8 @@ class BoostedTreeEstimatorTest(test_util.TensorFlowTestCase): output_leaf_index=False) classifier.fit(input_fn=_train_input_fn, steps=15) - # When no override of global steps, 5 steps were used. - self._assert_checkpoint(classifier.model_dir, global_step=5) + # When no override of global steps, 6 steps were used. + self._assert_checkpoint(classifier.model_dir, global_step=6) def testOverridesGlobalSteps(self): learner_config = learner_pb2.LearnerConfig() -- GitLab From acb13e448786838feb500973f51279dc90eeab50 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Thu, 27 Sep 2018 20:01:16 -0700 Subject: [PATCH 119/570] Fix visibility PiperOrigin-RevId: 214878220 --- tensorflow/tools/docs/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/tools/docs/BUILD b/tensorflow/tools/docs/BUILD index b218e900bf..2a858b4fd6 100644 --- a/tensorflow/tools/docs/BUILD +++ b/tensorflow/tools/docs/BUILD @@ -37,6 +37,7 @@ py_library( name = "doc_controls", srcs = ["doc_controls.py"], srcs_version = "PY2AND3", + visibility = ["//visibility:public"], ) py_test( -- GitLab From a309e136dcfdd13dc8e8eb7570b6c5945bb6f967 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 20:02:51 -0700 Subject: [PATCH 120/570] Keras Lambda - enhancements to output_shape computation PiperOrigin-RevId: 214878428 --- tensorflow/python/keras/layers/core.py | 51 +++++++++++++++------ tensorflow/python/keras/layers/core_test.py | 45 ++++++++++++++++++ 2 files changed, 82 insertions(+), 14 deletions(-) diff --git a/tensorflow/python/keras/layers/core.py b/tensorflow/python/keras/layers/core.py index 4032202986..efa21955e6 100644 --- a/tensorflow/python/keras/layers/core.py +++ b/tensorflow/python/keras/layers/core.py @@ -671,22 +671,34 @@ class Lambda(Layer): if mask is not None: self.supports_masking = True self.mask = mask - if output_shape is None: - self._output_shape = None - elif isinstance(output_shape, (tuple, list)): - self._output_shape = tuple(output_shape) - else: - if not callable(output_shape): - raise TypeError('In Lambda, `output_shape` ' - 'must be a list, a tuple, or a function.') - self._output_shape = output_shape + if (output_shape is not None and not isinstance(output_shape, + (tuple, list)) and + not callable(output_shape)): + raise TypeError('In Lambda, `output_shape` ' + 'must be a list, a tuple, or a function.') + # Convert a list representing a single shape into a tuple. + if (isinstance(output_shape, list) and isinstance(output_shape[0], + (int, type(None)))): + output_shape = tuple(output_shape) + self._output_shape = output_shape @tf_utils.shape_type_conversion def compute_output_shape(self, input_shape): if self._output_shape is None: if context.executing_eagerly(): - raise NotImplementedError - x = K.placeholder(shape=input_shape) + # Make use of existing autocomputation for Eager mode but provide + # Lambda-specific error message. + try: + return super(Lambda, self).compute_output_shape(input_shape) + except NotImplementedError: + raise NotImplementedError('We could not automatically infer ' + 'the static shape of the Lambda\'s output.' + ' Please specify the `output_shape` for' + ' this Lambda.') + if isinstance(input_shape, list): + x = [K.placeholder(shape=shape) for shape in input_shape] + else: + x = K.placeholder(shape=input_shape) x = self.call(x) if isinstance(x, list): return [tensor_shape.TensorShape(K.int_shape(x_elem)) for x_elem in x] @@ -697,16 +709,27 @@ class Lambda(Layer): num_samples = input_shape[0][0] else: num_samples = input_shape[0] if input_shape else None - return tensor_shape.TensorShape((num_samples,) + - tuple(self._output_shape)) + # List here represents multiple outputs. + if isinstance(self._output_shape, list): + return [ + tensor_shape.TensorShape((num_samples,) + tuple(single_shape)) + for single_shape in self._output_shape + ] + return tensor_shape.TensorShape((num_samples,) + self._output_shape) else: shape = self._output_shape(input_shape) if not isinstance(shape, (list, tuple)): raise ValueError( '`output_shape` function must return a tuple or a list of tuples.') + # List here can represent multiple outputs or single output. if isinstance(shape, list): - if isinstance(shape[0], int) or shape[0] is None: + # Convert list representing single output into a tuple. + if isinstance(shape[0], (int, type(None))): shape = tuple(shape) + else: + return [ + tensor_shape.TensorShape(single_shape) for single_shape in shape + ] return tensor_shape.TensorShape(shape) def call(self, inputs, mask=None): diff --git a/tensorflow/python/keras/layers/core_test.py b/tensorflow/python/keras/layers/core_test.py index 1df1d575b1..f0fea1f65c 100644 --- a/tensorflow/python/keras/layers/core_test.py +++ b/tensorflow/python/keras/layers/core_test.py @@ -252,6 +252,51 @@ class CoreLayersTest(test.TestCase): l(keras.backend.variable(np.ones((1, 1)))) self.assertEqual('lambda', l.get_config()['output_shape_type']) + @tf_test_util.run_in_graph_and_eager_modes + def test_lambda_output_shape_autocalculate_multiple_inputs(self): + + def lambda_fn(x): + return math_ops.matmul(x[0], x[1]) + + l = keras.layers.Lambda(lambda_fn) + output_shape = l.compute_output_shape([(10, 10), (10, 20)]) + self.assertAllEqual((10, 20), output_shape) + + @tf_test_util.run_in_graph_and_eager_modes + def test_lambda_output_shape_list_multiple_outputs(self): + + def lambda_fn(x): + return x + + l = keras.layers.Lambda(lambda_fn, output_shape=[(10,), (20,)]) + output_shape = l.compute_output_shape([(10, 10), (10, 20)]) + self.assertAllEqual([(10, 10), (10, 20)], output_shape) + + @tf_test_util.run_in_graph_and_eager_modes + def test_lambda_output_shape_tuple_with_none(self): + + def lambda_fn(x): + return x + + l = keras.layers.Lambda(lambda_fn, output_shape=(None, 10)) + output_shape = l.compute_output_shape((5, 10, 20)) + # Dimension(None) != Dimension(None), so check + # str representations for equality. + self.assertAllEqual(('5', '?', '10'), tuple([str(s) for s in output_shape])) + + @tf_test_util.run_in_graph_and_eager_modes + def test_lambda_output_shape_function_multiple_outputs(self): + + def lambda_fn(x): + return x + + def output_shape_fn(input_shape): + return input_shape + + l = keras.layers.Lambda(lambda_fn, output_shape=output_shape_fn) + output_shape = l.compute_output_shape([(10, 10), (10, 20)]) + self.assertAllEqual([(10, 10), (10, 20)], output_shape) + def test_lambda_config_serialization(self): with self.cached_session(): # test serialization with output_shape and output_shape_type -- GitLab From d377fdee3a5e266ac330a6742c15ece8e7ed8aa0 Mon Sep 17 00:00:00 2001 From: Daryl Ng Date: Thu, 27 Sep 2018 20:10:31 -0700 Subject: [PATCH 121/570] Adding to tpu_lib depenencies to optimization_parameters_py, tpu_embedding_configuration_py, and tpu_embedding_output_layout_py. PiperOrigin-RevId: 214879168 --- tensorflow/contrib/tpu/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index ac38612603..e9aa037634 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -252,7 +252,10 @@ py_library( ":tpu_py", "//tensorflow/contrib/cluster_resolver:tpu_cluster_resolver_py", "//tensorflow/contrib/tpu/proto:compilation_result_proto_py", + "//tensorflow/contrib/tpu/proto:optimization_parameters_proto_py", "//tensorflow/contrib/tpu/proto:topology_proto_py", + "//tensorflow/contrib/tpu/proto:tpu_embedding_configuration_proto_py", + "//tensorflow/contrib/tpu/proto:tpu_embedding_output_layout_proto_py", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", -- GitLab From 986193d79e00f1780fb3278ed890a72f7285f66e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 20:14:30 -0700 Subject: [PATCH 122/570] Move obsolete kernel code to legacy files. PiperOrigin-RevId: 214879388 --- .../internal/optimized/depthwiseconv_float.h | 74 -- .../internal/optimized/depthwiseconv_uint8.h | 102 -- .../internal/optimized/legacy_optimized_ops.h | 941 ++++++++++++++- .../internal/optimized/optimized_ops.h | 798 ------------ .../internal/reference/depthwiseconv_float.h | 75 -- .../internal/reference/depthwiseconv_uint8.h | 103 -- .../internal/reference/fully_connected.h | 134 --- .../internal/reference/legacy_reference_ops.h | 1067 ++++++++++++++++- .../internal/reference/reference_ops.h | 762 ------------ .../lite/kernels/internal/reference/softmax.h | 23 - 10 files changed, 2001 insertions(+), 2078 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h index 114575a96a..d8dd7bba89 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h @@ -1092,80 +1092,6 @@ inline void DepthwiseConv( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, - int dilation_width_factor, int dilation_height_factor, - int pad_width, int pad_height, int depth_multiplier, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { - tflite::DepthwiseParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = dilation_width_factor; - op_params.dilation_height_factor = dilation_height_factor; - op_params.depth_multiplier = depth_multiplier; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - DepthwiseConv(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { - DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, - bias_dims, stride_width, stride_height, 1, 1, pad_width, - pad_height, depth_multiplier, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, float* output_data, - const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, - bias_dims, stride_width, stride_height, pad_width, pad_height, - depth_multiplier, output_activation_min, output_activation_max, - output_data, output_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, int stride, - int pad_width, int pad_height, int depth_multiplier, - float* output_data, const Dims<4>& output_dims) { - DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, - bias_dims, stride, stride, pad_width, pad_height, - depth_multiplier, output_data, output_dims); -} - } // namespace optimized_ops } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index a70545599b..803eff292a 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -24,9 +24,6 @@ limitations under the License. namespace tflite { namespace optimized_ops { -// TODO(b/80418076): Move to legacy ops file, along with invocations. -static constexpr int kDepthwiseReverseShift = -1; - // Implementation of quantized DepthwiseConv template @@ -1996,105 +1993,6 @@ inline void DepthwiseConv( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, - int dilation_width_factor, int dilation_height_factor, - int pad_width, int pad_height, int depth_multiplier, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - tflite::DepthwiseParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = dilation_width_factor; - op_params.dilation_height_factor = dilation_height_factor; - op_params.depth_multiplier = depth_multiplier; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - op_params.input_offset = input_offset; - op_params.weights_offset = filter_offset; - op_params.output_offset = output_offset; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kDepthwiseReverseShift * output_shift; - - DepthwiseConv(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data); -} - -inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride_width, - stride_height, 1, 1, pad_width, pad_height, depth_multiplier, - output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, - output_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy, for compatibility with old checked-in code. -template -void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims) { - if (Ac == FusedActivationFunctionType::kNone) { - TFLITE_DCHECK_EQ(output_activation_min, 0); - TFLITE_DCHECK_EQ(output_activation_max, 255); - } - DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride_width, - stride_height, pad_width, pad_height, depth_multiplier, - output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, - output_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy, for compatibility with old checked-in code. -template -void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, int stride, - int pad_width, int pad_height, int depth_multiplier, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - DepthwiseConv(input_data, input_dims, input_offset, filter_data, - filter_dims, filter_offset, bias_data, bias_dims, stride, - stride, pad_width, pad_height, depth_multiplier, - output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, - output_dims); -} - } // namespace optimized_ops } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h index b6151c40b3..4218be20a4 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/legacy_optimized_ops.h @@ -19,6 +19,8 @@ limitations under the License. #include #include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_float.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h" #include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" #include "tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h" #include "tensorflow/contrib/lite/kernels/internal/types.h" @@ -28,9 +30,857 @@ namespace optimized_ops { // Unoptimized reference ops: using reference_ops::ArgMax; +using reference_ops::ArgMinMax; +using reference_ops::Broadcast4DSlowGreater; +using reference_ops::Broadcast4DSlowGreaterEqual; +using reference_ops::Broadcast4DSlowGreaterEqualWithScaling; +using reference_ops::Broadcast4DSlowGreaterWithScaling; +using reference_ops::Broadcast4DSlowLess; +using reference_ops::Broadcast4DSlowLessEqual; +using reference_ops::Broadcast4DSlowLessEqualWithScaling; +using reference_ops::Broadcast4DSlowLessWithScaling; +using reference_ops::BroadcastAdd4DSlow; +using reference_ops::BroadcastGreater; +using reference_ops::BroadcastGreaterEqual; +using reference_ops::BroadcastLess; +using reference_ops::BroadcastLessEqual; +using reference_ops::BroadcastMul4DSlow; +using reference_ops::BroadcastSub4DSlow; +using reference_ops::Concatenation; +using reference_ops::ConcatenationWithScaling; +using reference_ops::DepthConcatenation; +using reference_ops::Dequantize; +using reference_ops::Div; +using reference_ops::FakeQuant; +using reference_ops::Gather; +using reference_ops::Greater; +using reference_ops::GreaterEqual; +using reference_ops::GreaterEqualWithScaling; +using reference_ops::GreaterWithScaling; +using reference_ops::Less; +using reference_ops::LessEqual; +using reference_ops::LessEqualWithScaling; +using reference_ops::LessWithScaling; +using reference_ops::Mean; +using reference_ops::RankOneSelect; using reference_ops::Relu1; using reference_ops::Relu6; +using reference_ops::ReluX; +using reference_ops::Select; using reference_ops::SpaceToBatchND; +using reference_ops::Split; +using reference_ops::StridedSlice; +using reference_ops::TensorFlowSplit; +using reference_ops::Transpose; + +static constexpr int kDepthwiseReverseShift = -1; + +template +VectorMap MapAsVector(Scalar* data, const Dims& dims) { + const int size = FlatSize(dims); + return VectorMap(data, size, 1); +} + +template +MatrixMap MapAsMatrixWithFirstDimAsRows(Scalar* data, + const Dims& dims) { + const int rows = dims.sizes[0]; + int cols = 1; + for (int d = 1; d < N; d++) { + cols *= dims.sizes[d]; + } + return MatrixMap(data, rows, cols); +} + +template +MatrixMap MapAsMatrixWithLastDimAsCols(Scalar* data, + const Dims& dims) { + const int cols = dims.sizes[N - 1]; + int rows = 1; + for (int d = 0; d < N - 1; d++) { + rows *= dims.sizes[d]; + } + return MatrixMap(data, rows, cols); +} + +template +ArrayMap MapAsArrayWithFirstDimAsRows(Scalar* data, + const Dims& dims) { + const int rows = dims.sizes[0]; + int cols = 1; + for (int d = 1; d < N; d++) { + cols *= dims.sizes[d]; + } + return ArrayMap(data, rows, cols); +} + +// TODO(b/62193649): this function is only needed as long +// as we have the --variable_batch hack. +template +MatrixMap MapAsMatrixWithGivenNumberOfRows(Scalar* data, + const Dims& dims, + int rows) { + const int flatsize = FlatSize(dims); + TFLITE_DCHECK((flatsize % rows) == 0); + const int cols = flatsize / rows; + return MatrixMap(data, rows, cols); +} + +inline bool AreSameDims(const Dims<4>& dims1, const Dims<4>& dims2) { + for (int i = 0; i < 4; i++) { + if (dims1.sizes[i] != dims2.sizes[i]) { + return false; + } + } + return true; +} + +inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, + int dilation_width_factor, int dilation_height_factor, + int pad_width, int pad_height, int depth_multiplier, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.depth_multiplier = depth_multiplier; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + DepthwiseConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data); +} + +inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride_width, stride_height, 1, 1, pad_width, + pad_height, depth_multiplier, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride_width, stride_height, pad_width, pad_height, + depth_multiplier, output_activation_min, output_activation_max, + output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + float* output_data, const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride, stride, pad_width, pad_height, + depth_multiplier, output_data, output_dims); +} + +inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, + int dilation_width_factor, int dilation_height_factor, + int pad_width, int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.depth_multiplier = depth_multiplier; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kDepthwiseReverseShift * output_shift; + + DepthwiseConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data); +} + +inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, + stride_height, 1, 1, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, + stride_height, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, input_offset, filter_data, + filter_dims, filter_offset, bias_data, bias_dims, stride, + stride, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void AddBiasAndEvalActivationFunction(const float* bias_data, + const Dims<4>& bias_dims, + float* array_data, + const Dims<4>& array_dims, + float output_activation_min, + float output_activation_max) { + AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max, + DimsToShape(bias_dims), bias_data, + DimsToShape(array_dims), array_data); +} + +// legacy, for compatibility with old checked-in code +template +void AddBiasAndEvalActivationFunction(const float* bias_data, + const Dims<4>& bias_dims, + float* array_data, + const Dims<4>& array_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + AddBiasAndEvalActivationFunction(bias_data, bias_dims, array_data, array_dims, + output_activation_min, + output_activation_max); +} + +inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + tflite::FullyConnectedParams op_params; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + FullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(weights_dims), weights_data, + DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data); +} + +// legacy, for compatibility with old checked-in code +template +void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, const Dims<4>& weights_dims, + const float* bias_data, const Dims<4>& bias_dims, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data, + bias_dims, output_activation_min, output_activation_max, + output_data, output_dims); +} + +inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + tflite::FullyConnectedParams op_params; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kReverseShift * output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + FullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data, + gemm_context); +} + +inline void FullyConnected( + const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, + const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data_int32, const Dims<4>& bias_dims, int32 output_offset, + int32 output_multiplier, int output_shift, int32 output_activation_min, + int32 output_activation_max, int16* output_data, const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + tflite::FullyConnectedParams op_params; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kReverseShift * output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + FullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data_int32, DimsToShape(output_dims), output_data, + gemm_context); +} + +// legacy, for compatibility with old checked-in code +template +void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims, gemm_context); +} + +inline void ShuffledFullyConnected( + const uint8* input_data, const Dims<4>& input_dims, + const uint8* shuffled_weights_data, const Dims<4>& weights_dims, + const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, + int output_shift, int32 output_activation_min, int32 output_activation_max, + int16* output_data, const Dims<4>& output_dims, + uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) { + tflite::FullyConnectedParams op_params; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kReverseShift * output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(weights_dims), shuffled_weights_data, + DimsToShape(bias_dims), bias_data, + DimsToShape(output_dims), output_data, + shuffled_input_workspace_data, gemm_context); +} + +template +inline void ExtractPatchIntoBufferColumn( + const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth, + int stride_width, int stride_height, int pad_width, int pad_height, + int in_width, int in_height, int in_depth, int single_buffer_length, + int buffer_id, const T* in_data, T* conv_buffer_data, uint8 zero_byte) { + ExtractPatchIntoBufferColumn( + DimsToShape(input_dims), w, h, b, kheight, kwidth, stride_width, + stride_height, pad_width, pad_height, in_width, in_height, in_depth, + single_buffer_length, buffer_id, in_data, conv_buffer_data, zero_byte); +} + +template +void DilatedIm2col(const T* input_data, const Dims<4>& input_dims, + const Dims<4>& filter_dims, int stride_width, + int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + const Dims<4>& output_dims, uint8 zero_byte, + T* im2col_data) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + + DilatedIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), DimsToShape(output_dims), + im2col_data); +} + +template +void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width, + int stride_height, int pad_width, int pad_height, int kheight, + int kwidth, uint8 zero_byte, T* output_data, + const Dims<4>& output_dims) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = 1; + op_params.dilation_height_factor = 1; + + Im2col(op_params, kheight, kwidth, zero_byte, DimsToShape(input_dims), + input_data, DimsToShape(output_dims), output_data); +} + +// legacy, for compatibility with old checked-in code +template +void Im2col(const T* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int kheight, int kwidth, + uint8 zero_byte, T* output_data, const Dims<4>& output_dims) { + Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight, + kwidth, zero_byte, output_data, output_dims); +} + +inline void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims, + float* im2col_data, const Dims<4>& im2col_dims) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), + filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data); +} + +inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims, + const int8_t* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, float* scaling_factors_ptr, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims, + int8_t* im2col_data, const Dims<4>& im2col_dims) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + HybridConv(op_params, scaling_factors_ptr, DimsToShape(input_dims), + input_data, DimsToShape(filter_dims), filter_data, + DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data); +} + +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + float* output_data, const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, + stride_width, stride_height, dilation_width_factor, + dilation_height_factor, pad_width, pad_height, output_activation_min, + output_activation_max, output_data, output_dims, im2col_data, + im2col_dims); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, + stride_width, stride_height, 1, 1, pad_width, pad_height, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride, stride, 1, 1, pad_width, pad_height, output_data, + output_dims, im2col_data, im2col_dims); +} + +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, + uint8* im2col_data, const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kReverseShift * output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), + filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data, gemm_context); +} + +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, stride_height, 1, 1, + pad_width, pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims, gemm_context); +} + +// legacy, for compatibility with old checked-in code +template +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, stride_height, + pad_width, pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims, gemm_context); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, stride, pad_width, + pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims, gemm_context); +} + +// legacy, for compatibility with old checked-in code +template +void Im2col(const T* input_data, const Dims<4>& input_dims, int stride, + int pad_width, int pad_height, int kheight, int kwidth, + uint8 zero_byte, T* output_data, const Dims<4>& output_dims) { + Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight, + kwidth, zero_byte, output_data, output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void ConvAsGemm(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + float* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("ConvAsGemm"); + + const auto input_matrix_map = + MapAsMatrixWithFirstDimAsRows(input_data, input_dims); + const auto filter_matrix_map = + MapAsMatrixWithLastDimAsCols(filter_data, filter_dims); + auto output_matrix_map = + MapAsMatrixWithFirstDimAsRows(output_data, output_dims); + + Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map); + + AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data, + output_dims); +} + +// legacy, for compatibility with old checked-in code +template +void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label("ConvAsGemm/8bit"); + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + const int input_rows = input_dims.sizes[0]; + const int input_cols = FlatSizeSkipDim(input_dims, 0); + const int filter_rows = filter_dims.sizes[3]; + const int filter_cols = FlatSizeSkipDim(filter_dims, 3); + const int output_rows = output_dims.sizes[0]; + const int output_cols = FlatSizeSkipDim(output_dims, 0); + TFLITE_DCHECK_EQ(output_rows, filter_rows); + TFLITE_DCHECK_EQ(output_cols, input_cols); + TFLITE_DCHECK_EQ(filter_cols, input_rows); + TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows); + TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1); + TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1); + TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1); + gemmlowp::MatrixMap filter_matrix( + filter_data, output_rows, filter_cols, filter_cols); + gemmlowp::MatrixMap input_matrix( + input_data, filter_cols, output_cols, filter_cols); + gemmlowp::MatrixMap output_matrix( + output_data, output_rows, output_cols, output_rows); + const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp( + bias_data, output_rows, output_offset, output_multiplier, -output_shift, + output_activation_min, output_activation_max); + gemmlowp::GemmWithOutputPipeline( + gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, + input_offset, output_pipeline); +} + +inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + + TransposeConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data); +} + +template +void TransposeIm2col(const T* input_data, const Dims<4>& input_dims, + const Dims<4>& filter_dims, int stride_width, + int stride_height, int pad_width, int pad_height, + const Dims<4>& output_dims, uint8 zero_byte, + T* im2col_data) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + + TransposeIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), DimsToShape(output_dims), + im2col_data); +} + +inline void LstmCell(const float* input_data, const Dims<4>& input_dims, + const float* prev_activ_data, + const Dims<4>& prev_activ_dims, const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, const float* prev_state_data, + const Dims<4>& prev_state_dims, float* output_state_data, + const Dims<4>& output_state_dims, float* output_activ_data, + const Dims<4>& output_activ_dims, float* concat_temp_data, + const Dims<4>& concat_temp_dims, float* activ_temp_data, + const Dims<4>& activ_temp_dims) { + tflite::LstmCellParams op_params; + // Float LSTM cell does not need parameters to be set: leave untouched. + + LstmCell(op_params, DimsToShape(input_dims), input_data, + DimsToShape(prev_activ_dims), prev_activ_data, + DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims), + bias_data, DimsToShape(prev_state_dims), prev_state_data, + DimsToShape(output_state_dims), output_state_data, + DimsToShape(output_activ_dims), output_activ_data, + DimsToShape(concat_temp_dims), concat_temp_data, + DimsToShape(activ_temp_dims), activ_temp_data); +} + +template +void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, + const uint8* prev_activ_data_uint8, + const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8, + const Dims<4>& weights_dims, const int32* bias_data_int32, + const Dims<4>& bias_dims, const int16* prev_state_data_int16, + const Dims<4>& prev_state_dims, int16* output_state_data_int16, + const Dims<4>& output_state_dims, uint8* output_activ_data_uint8, + const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8, + const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16, + const Dims<4>& activ_temp_dims, int32 weights_zero_point, + int32 accum_multiplier, int accum_shift, + gemmlowp::GemmContext* gemm_context) { + tflite::LstmCellParams op_params; + op_params.weights_zero_point = weights_zero_point; + op_params.accum_multiplier = accum_multiplier; + op_params.accum_shift = accum_shift; + + LstmCell( + op_params, DimsToShape(input_dims), input_data_uint8, + DimsToShape(prev_activ_dims), prev_activ_data_uint8, + DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims), + bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16, + DimsToShape(output_state_dims), output_state_data_int16, + DimsToShape(output_activ_dims), output_activ_data_uint8, + DimsToShape(concat_temp_dims), concat_temp_data_uint8, + DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context); +} + +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + tflite::ArithmeticParams op_params; + SetActivationParams(output_activation_min, output_activation_max, &op_params); + + BroadcastDiv4DSlow(op_params, DimsToShape(input1_dims), input1_data, + DimsToShape(input2_dims), input2_data, + DimsToShape(output_dims), output_data); +} template void L2Normalization(const float* input_data, const RuntimeShape& input_shape, @@ -574,6 +1424,14 @@ void L2Pool(const float* input_data, const Dims<4>& input_dims, int stride, filter_width, filter_height, output_data, output_dims); } +inline void Softmax(const float* input_data, const RuntimeShape& input_shape, + float beta, float* output_data, + const RuntimeShape& output_shape) { + SoftmaxParams params; + params.beta = beta; + Softmax(params, input_shape, input_data, output_shape, output_data); +} + inline void Softmax(const float* input_data, const Dims<4>& input_dims, float beta, float* output_data, const Dims<4>& output_dims) { @@ -581,6 +1439,16 @@ inline void Softmax(const float* input_data, const Dims<4>& input_dims, DimsToShape(output_dims)); } +inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_beta_multiplier, int32 input_beta_left_shift, + int diff_min, uint8* output_data, + const RuntimeShape& output_shape) { + SoftmaxParams params; + params.input_multiplier = input_beta_multiplier; + params.input_left_shift = input_beta_left_shift; + params.diff_min = diff_min; + Softmax(params, input_shape, input_data, output_shape, output_data); +} inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, int32 input_beta_multiplier, int32 input_beta_left_shift, int diff_min, uint8* output_data, @@ -590,12 +1458,33 @@ inline void Softmax(const uint8* input_data, const Dims<4>& input_dims, DimsToShape(output_dims)); } +inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, + float* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + // No params currently used for float LogSoftmax. + LogSoftmax(params, input_shape, input_data, output_shape, output_data); +} + inline void LogSoftmax(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { LogSoftmax(input_data, DimsToShape(input_dims), output_data, DimsToShape(output_dims)); } +inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_multiplier, int32 input_left_shift, + int32 reverse_scaling_divisor, + int32 reverse_scaling_right_shift, int diff_min, + uint8* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + params.reverse_scaling_divisor = reverse_scaling_divisor; + params.reverse_scaling_right_shift = reverse_scaling_right_shift; + params.diff_min = diff_min; + LogSoftmax(params, input_shape, input_data, output_shape, output_data); +} + inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, int32 input_multiplier, int32 input_left_shift, int32 reverse_scaling_divisor, @@ -607,6 +1496,18 @@ inline void LogSoftmax(const uint8* input_data, const Dims<4>& input_dims, DimsToShape(output_dims)); } +inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const RuntimeShape& output_shape) { + LogisticParams params; + params.input_zero_point = input_zero_point; + params.input_range_radius = input_range_radius; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + Logistic(params, input_shape, input_data, output_shape, output_data); +} + inline void Logistic(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { Logistic(DimsToShape(input_dims), input_data, DimsToShape(output_dims), @@ -622,6 +1523,20 @@ inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, DimsToShape(output_dims)); } +inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, + const RuntimeShape& output_shape, int16* output_data) { + LogisticParams params; + // No params currently needed by int16 Logistic. + Logistic(params, input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const int16* input_data, const RuntimeShape& input_shape, + int16* output_data, const RuntimeShape& output_shape) { + LogisticParams params; + // No params currently needed by int16 Logistic. + Logistic(params, input_shape, input_data, output_shape, output_data); +} + inline void Logistic(const int16* input_data, const Dims<4>& input_dims, int16* output_data, const Dims<4>& output_dims) { Logistic(input_data, DimsToShape(input_dims), output_data, @@ -634,6 +1549,18 @@ inline void Tanh(const float* input_data, const Dims<4>& input_dims, output_data); } +inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const RuntimeShape& output_shape) { + TanhParams params; + params.input_zero_point = input_zero_point; + params.input_range_radius = input_range_radius; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + Tanh(params, input_shape, input_data, output_shape, output_data); +} + inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, int32 input_zero_point, int32 input_range_radius, int32 input_multiplier, int input_left_shift, @@ -643,6 +1570,14 @@ inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, DimsToShape(output_dims)); } +inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, + int input_left_shift, int16* output_data, + const RuntimeShape& output_shape) { + TanhParams params; + params.input_left_shift = input_left_shift; + Tanh(params, input_shape, input_data, output_shape, output_data); +} + inline void Tanh(const int16* input_data, const Dims<4>& input_dims, int input_left_shift, int16* output_data, const Dims<4>& output_dims) { @@ -777,7 +1712,6 @@ inline void BroadcastMul(const float* input1_data, const Dims<4>& input1_dims, DimsToShape(output_dims), output_data); } -// Legacy Dims<4>. inline void LocalResponseNormalization(const float* input_data, const Dims<4>& input_dims, int range, float bias, float alpha, float beta, @@ -793,7 +1727,6 @@ inline void LocalResponseNormalization(const float* input_data, DimsToShape(output_dims), output_data); } -// Legacy Dims<4> version. template void Cast(const SrcT* input_data, const Dims<4>& input_dims, DstT* output_data, const Dims<4>& output_dims) { @@ -801,14 +1734,12 @@ void Cast(const SrcT* input_data, const Dims<4>& input_dims, DstT* output_data, output_data); } -// Legacy Dims<4> version. inline void Floor(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { Floor(DimsToShape(input_dims), input_data, DimsToShape(output_dims), output_data); } -// Legacy Dims<4> inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims, const int32* output_size_data, const Dims<4>& output_size_dims, float* output_data, @@ -820,7 +1751,6 @@ inline void ResizeBilinear(const float* input_data, const Dims<4>& input_dims, DimsToShape(output_dims), output_data); } -// Legacy Dims<4> inline void ResizeBilinear(const uint8* input_data, const Dims<4>& input_dims, const int32* output_size_data, const Dims<4>& output_size_dims, uint8* output_data, @@ -850,7 +1780,6 @@ inline void ResizeBilinear(const uint8* input_data, const Dims<4>& input_dims, output_data, output_dims, /*align_corners=*/false); } -// Legacy Dims<4>. template inline void BatchToSpaceND(const T* input_data, const Dims<4>& input_dims, const int32* block_shape_data, diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 732880d9da..77f84e0c1c 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -52,10 +52,6 @@ using reference_ops::Broadcast4DSlowLessEqual; using reference_ops::Broadcast4DSlowLessEqualWithScaling; using reference_ops::Broadcast4DSlowLessWithScaling; using reference_ops::BroadcastAdd4DSlow; -using reference_ops::BroadcastGreater; -using reference_ops::BroadcastGreaterEqual; -using reference_ops::BroadcastLess; -using reference_ops::BroadcastLessEqual; using reference_ops::BroadcastMul4DSlow; using reference_ops::BroadcastSub4DSlow; using reference_ops::Concatenation; @@ -82,7 +78,6 @@ using reference_ops::Select; using reference_ops::SpaceToBatchND; using reference_ops::Split; using reference_ops::StridedSlice; -using reference_ops::TensorFlowSplit; using reference_ops::Transpose; // TODO(b/80247582) Remove this constant. @@ -112,12 +107,6 @@ VectorMap MapAsVector(Scalar* data, const RuntimeShape& shape) { return VectorMap(data, size, 1); } -template -VectorMap MapAsVector(Scalar* data, const Dims& dims) { - const int size = FlatSize(dims); - return VectorMap(data, size, 1); -} - // Make a local VectorMap typedef allowing to map a float array // as a Eigen matrix expression. The same explanation as for VectorMap // above also applies here. @@ -145,28 +134,6 @@ MatrixMap MapAsMatrixWithFirstDimAsCols(Scalar* data, return MatrixMap(data, rows, cols); } -template -MatrixMap MapAsMatrixWithFirstDimAsRows(Scalar* data, - const Dims& dims) { - const int rows = dims.sizes[0]; - int cols = 1; - for (int d = 1; d < N; d++) { - cols *= dims.sizes[d]; - } - return MatrixMap(data, rows, cols); -} - -template -MatrixMap MapAsMatrixWithLastDimAsCols(Scalar* data, - const Dims& dims) { - const int cols = dims.sizes[N - 1]; - int rows = 1; - for (int d = 0; d < N - 1; d++) { - rows *= dims.sizes[d]; - } - return MatrixMap(data, rows, cols); -} - template using ArrayMap = typename std::conditional< std::is_const::value, @@ -174,17 +141,6 @@ using ArrayMap = typename std::conditional< Eigen::Dynamic, Eigen::Dynamic>>, Eigen::Map>>::type; -template -ArrayMap MapAsArrayWithFirstDimAsRows(Scalar* data, - const Dims& dims) { - const int rows = dims.sizes[0]; - int cols = 1; - for (int d = 1; d < N; d++) { - cols *= dims.sizes[d]; - } - return ArrayMap(data, rows, cols); -} - template ArrayMap MapAsArrayWithLastDimAsRows(Scalar* data, const RuntimeShape& shape) { @@ -206,20 +162,6 @@ struct TTypes { UnalignedConstMatrix; }; -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -// TODO(b/62193649): this function is only needed as long -// as we have the --variable_batch hack. -template -MatrixMap MapAsMatrixWithGivenNumberOfRows(Scalar* data, - const Dims& dims, - int rows) { - const int flatsize = FlatSize(dims); - TFLITE_DCHECK((flatsize % rows) == 0); - const int cols = flatsize / rows; - return MatrixMap(data, rows, cols); -} - // TODO(b/62193649): this function is only needed as long // as we have the --variable_batch hack. template @@ -271,15 +213,6 @@ SaturatingRoundingMultiplyByPOTParam( SaturatingRoundingMultiplyByPOTParam(a.raw(), exponent)); } -inline bool AreSameDims(const Dims<4>& dims1, const Dims<4>& dims2) { - for (int i = 0; i < 4; i++) { - if (dims1.sizes[i] != dims2.sizes[i]) { - return false; - } - } - return true; -} - inline void AddBiasAndEvalActivationFunction(float output_activation_min, float output_activation_max, const RuntimeShape& bias_shape, @@ -353,33 +286,6 @@ inline void AddBiasAndEvalActivationFunction(float output_activation_min, #endif } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void AddBiasAndEvalActivationFunction(const float* bias_data, - const Dims<4>& bias_dims, - float* array_data, - const Dims<4>& array_dims, - float output_activation_min, - float output_activation_max) { - AddBiasAndEvalActivationFunction(output_activation_min, output_activation_max, - DimsToShape(bias_dims), bias_data, - DimsToShape(array_dims), array_data); -} - -// Note: This to be converted to RuntimeShapes along with Conv. -// legacy, for compatibility with old checked-in code -template -void AddBiasAndEvalActivationFunction(const float* bias_data, - const Dims<4>& bias_dims, - float* array_data, - const Dims<4>& array_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - AddBiasAndEvalActivationFunction(bias_data, bias_dims, array_data, array_dims, - output_activation_min, - output_activation_max); -} - template void Gemm(const Eigen::MatrixBase& lhs, const Eigen::MatrixBase& rhs, Eigen::MatrixBase* result) { @@ -926,38 +832,6 @@ inline void FullyConnected( output_data); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, - const float* weights_data, - const Dims<4>& weights_dims, const float* bias_data, - const Dims<4>& bias_dims, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { - tflite::FullyConnectedParams op_params; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - FullyConnected(op_params, DimsToShape(input_dims), input_data, - DimsToShape(weights_dims), weights_data, - DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), - output_data); -} - -// legacy, for compatibility with old checked-in code -template -void FullyConnected(const float* input_data, const Dims<4>& input_dims, - const float* weights_data, const Dims<4>& weights_dims, - const float* bias_data, const Dims<4>& bias_dims, - float* output_data, const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data, - bias_dims, output_activation_min, output_activation_max, - output_data, output_dims); -} - #ifdef USE_NEON inline void FullyConnectedAsGEMV( const RuntimeShape& input_shape, const uint8* input_data, @@ -1204,33 +1078,6 @@ inline void FullyConnected( input_offset, output_pipeline); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims, - gemmlowp::GemmContext* gemm_context) { - tflite::FullyConnectedParams op_params; - op_params.input_offset = input_offset; - op_params.weights_offset = filter_offset; - op_params.output_offset = output_offset; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kReverseShift * output_shift; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - FullyConnected(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data, - gemm_context); -} - inline void FullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape, @@ -1318,54 +1165,6 @@ inline void FullyConnected( input_offset, output_pipeline); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void FullyConnected( - const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, - const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data_int32, const Dims<4>& bias_dims, int32 output_offset, - int32 output_multiplier, int output_shift, int32 output_activation_min, - int32 output_activation_max, int16* output_data, const Dims<4>& output_dims, - gemmlowp::GemmContext* gemm_context) { - tflite::FullyConnectedParams op_params; - op_params.input_offset = input_offset; - op_params.weights_offset = filter_offset; - op_params.output_offset = output_offset; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kReverseShift * output_shift; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - FullyConnected(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), - bias_data_int32, DimsToShape(output_dims), output_data, - gemm_context); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims, - gemmlowp::GemmContext* gemm_context) { - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_data, output_dims, gemm_context); -} - // Internal function doing the actual arithmetic work for // ShuffledFullyConnected. // May be called either directly by it (single-threaded case) or may be used @@ -1810,29 +1609,6 @@ inline void ShuffledFullyConnected( gemm_context->workers_pool()->Execute(tasks); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void ShuffledFullyConnected( - const uint8* input_data, const Dims<4>& input_dims, - const uint8* shuffled_weights_data, const Dims<4>& weights_dims, - const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, - int output_shift, int32 output_activation_min, int32 output_activation_max, - int16* output_data, const Dims<4>& output_dims, - uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) { - tflite::FullyConnectedParams op_params; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kReverseShift * output_shift; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data, - DimsToShape(weights_dims), shuffled_weights_data, - DimsToShape(bias_dims), bias_data, - DimsToShape(output_dims), output_data, - shuffled_input_workspace_data, gemm_context); -} - template inline void ExtractPatchIntoBufferColumn(const RuntimeShape& input_shape, int w, int h, int b, int kheight, int kwidth, @@ -1923,20 +1699,6 @@ inline void ExtractPatchIntoBufferColumn(const RuntimeShape& input_shape, int w, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -inline void ExtractPatchIntoBufferColumn( - const Dims<4>& input_dims, int w, int h, int b, int kheight, int kwidth, - int stride_width, int stride_height, int pad_width, int pad_height, - int in_width, int in_height, int in_depth, int single_buffer_length, - int buffer_id, const T* in_data, T* conv_buffer_data, uint8 zero_byte) { - ExtractPatchIntoBufferColumn( - DimsToShape(input_dims), w, h, b, kheight, kwidth, stride_width, - stride_height, pad_width, pad_height, in_width, in_height, in_depth, - single_buffer_length, buffer_id, in_data, conv_buffer_data, zero_byte); -} - template void DilatedIm2col(const ConvParams& params, uint8 zero_byte, const RuntimeShape& input_shape, const T* input_data, @@ -2020,30 +1782,6 @@ void DilatedIm2col(const ConvParams& params, uint8 zero_byte, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -void DilatedIm2col(const T* input_data, const Dims<4>& input_dims, - const Dims<4>& filter_dims, int stride_width, - int stride_height, int dilation_width_factor, - int dilation_height_factor, int pad_width, int pad_height, - const Dims<4>& output_dims, uint8 zero_byte, - T* im2col_data) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = dilation_width_factor; - op_params.dilation_height_factor = dilation_height_factor; - - DilatedIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), DimsToShape(output_dims), - im2col_data); -} - template void Im2col(const ConvParams& params, int kheight, int kwidth, uint8 zero_byte, const RuntimeShape& input_shape, const T* input_data, @@ -2079,36 +1817,6 @@ void Im2col(const ConvParams& params, int kheight, int kwidth, uint8 zero_byte, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -void Im2col(const T* input_data, const Dims<4>& input_dims, int stride_width, - int stride_height, int pad_width, int pad_height, int kheight, - int kwidth, uint8 zero_byte, T* output_data, - const Dims<4>& output_dims) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = 1; - op_params.dilation_height_factor = 1; - - Im2col(op_params, kheight, kwidth, zero_byte, DimsToShape(input_dims), - input_data, DimsToShape(output_dims), output_data); -} - -// legacy, for compatibility with old checked-in code -template -void Im2col(const T* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int kheight, int kwidth, - uint8 zero_byte, T* output_data, const Dims<4>& output_dims) { - Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight, - kwidth, zero_byte, output_data, output_dims); -} - inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& filter_shape, const float* filter_data, const RuntimeShape& bias_shape, @@ -2172,33 +1880,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, output_data); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int dilation_width_factor, - int dilation_height_factor, int pad_width, int pad_height, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims, - float* im2col_data, const Dims<4>& im2col_dims) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = dilation_width_factor; - op_params.dilation_height_factor = dilation_height_factor; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), - filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), - output_data, DimsToShape(im2col_dims), im2col_data); -} - inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr, const RuntimeShape& input_shape, const int8_t* input_data, @@ -2279,82 +1960,6 @@ inline void HybridConv(const ConvParams& params, float* scaling_factors_ptr, output_data); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void HybridConv(const int8_t* input_data, const Dims<4>& input_dims, - const int8_t* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, float* scaling_factors_ptr, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims, - int8_t* im2col_data, const Dims<4>& im2col_dims) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - HybridConv(op_params, scaling_factors_ptr, DimsToShape(input_dims), - input_data, DimsToShape(filter_dims), filter_data, - DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), - output_data, DimsToShape(im2col_dims), im2col_data); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, int stride_width, - int stride_height, int dilation_width_factor, - int dilation_height_factor, int pad_width, int pad_height, - float* output_data, const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, - stride_width, stride_height, dilation_width_factor, - dilation_height_factor, pad_width, pad_height, output_activation_min, - output_activation_max, output_data, output_dims, im2col_data, - im2col_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, int stride_width, - int stride_height, int pad_width, int pad_height, float* output_data, - const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, - stride_width, stride_height, 1, 1, pad_width, pad_height, - output_activation_min, output_activation_max, output_data, output_dims, - im2col_data, im2col_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, int stride, - int pad_width, int pad_height, float* output_data, - const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { - Conv(input_data, input_dims, filter_data, filter_dims, bias_data, - bias_dims, stride, stride, 1, 1, pad_width, pad_height, output_data, - output_dims, im2col_data, im2col_dims); -} - inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape, const uint8* filter_data, const RuntimeShape& bias_shape, @@ -2446,192 +2051,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, input_offset, output_pipeline); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Conv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int dilation_width_factor, - int dilation_height_factor, int pad_width, int pad_height, - int32 output_offset, int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims, - uint8* im2col_data, const Dims<4>& im2col_dims, - gemmlowp::GemmContext* gemm_context) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = dilation_width_factor; - op_params.dilation_height_factor = dilation_height_factor; - op_params.input_offset = input_offset; - op_params.weights_offset = filter_offset; - op_params.output_offset = output_offset; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kReverseShift * output_shift; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), - filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), - output_data, DimsToShape(im2col_dims), im2col_data, gemm_context); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Conv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims, uint8* im2col_data, - const Dims<4>& im2col_dims, - gemmlowp::GemmContext* gemm_context) { - Conv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride_width, stride_height, 1, 1, - pad_width, pad_height, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, output_dims, - im2col_data, im2col_dims, gemm_context); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -inline void Conv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims, uint8* im2col_data, - const Dims<4>& im2col_dims, - gemmlowp::GemmContext* gemm_context) { - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - if (Ac == FusedActivationFunctionType::kNone) { - TFLITE_DCHECK_EQ(output_activation_min, 0); - TFLITE_DCHECK_EQ(output_activation_max, 255); - } - Conv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride_width, stride_height, - pad_width, pad_height, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, output_dims, - im2col_data, im2col_dims, gemm_context); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void Conv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, int stride, - int pad_width, int pad_height, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data, - const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) { - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - Conv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride, stride, pad_width, - pad_height, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, output_dims, - im2col_data, im2col_dims, gemm_context); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void Im2col(const T* input_data, const Dims<4>& input_dims, int stride, - int pad_width, int pad_height, int kheight, int kwidth, - uint8 zero_byte, T* output_data, const Dims<4>& output_dims) { - Im2col(input_data, input_dims, stride, stride, pad_width, pad_height, kheight, - kwidth, zero_byte, output_data, output_dims); -} - -// legacy, for compatibility with old checked-in code -template -void ConvAsGemm(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - float* output_data, const Dims<4>& output_dims) { - gemmlowp::ScopedProfilingLabel label("ConvAsGemm"); - - const auto input_matrix_map = - MapAsMatrixWithFirstDimAsRows(input_data, input_dims); - const auto filter_matrix_map = - MapAsMatrixWithLastDimAsCols(filter_data, filter_dims); - auto output_matrix_map = - MapAsMatrixWithFirstDimAsRows(output_data, output_dims); - - Gemm(filter_matrix_map.transpose(), input_matrix_map, &output_matrix_map); - - AddBiasAndEvalActivationFunction(bias_data, bias_dims, output_data, - output_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int32 output_offset, int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims, - gemmlowp::GemmContext* gemm_context) { - gemmlowp::ScopedProfilingLabel label("ConvAsGemm/8bit"); - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - const int input_rows = input_dims.sizes[0]; - const int input_cols = FlatSizeSkipDim(input_dims, 0); - const int filter_rows = filter_dims.sizes[3]; - const int filter_cols = FlatSizeSkipDim(filter_dims, 3); - const int output_rows = output_dims.sizes[0]; - const int output_cols = FlatSizeSkipDim(output_dims, 0); - TFLITE_DCHECK_EQ(output_rows, filter_rows); - TFLITE_DCHECK_EQ(output_cols, input_cols); - TFLITE_DCHECK_EQ(filter_cols, input_rows); - TFLITE_DCHECK_EQ(bias_dims.sizes[0], output_rows); - TFLITE_DCHECK_EQ(bias_dims.sizes[1], 1); - TFLITE_DCHECK_EQ(bias_dims.sizes[2], 1); - TFLITE_DCHECK_EQ(bias_dims.sizes[3], 1); - gemmlowp::MatrixMap filter_matrix( - filter_data, output_rows, filter_cols, filter_cols); - gemmlowp::MatrixMap input_matrix( - input_data, filter_cols, output_cols, filter_cols); - gemmlowp::MatrixMap output_matrix( - output_data, output_rows, output_cols, output_rows); - const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp( - bias_data, output_rows, output_offset, output_multiplier, -output_shift, - output_activation_min, output_activation_max); - gemmlowp::GemmWithOutputPipeline( - gemm_context, filter_matrix, input_matrix, &output_matrix, filter_offset, - input_offset, output_pipeline); -} - template inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params, const RuntimeShape& unextended_input_shape, @@ -3548,21 +2967,6 @@ void BroadcastDiv4DSlow(const ArithmeticParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -template -void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T output_activation_min, T output_activation_max, - T* output_data, const Dims<4>& output_dims) { - tflite::ArithmeticParams op_params; - SetActivationParams(output_activation_min, output_activation_max, &op_params); - - BroadcastDiv4DSlow(op_params, DimsToShape(input1_dims), input1_data, - DimsToShape(input2_dims), input2_data, - DimsToShape(output_dims), output_data); -} - // TODO(aselle): This is not actually optimized yet. inline void SubNonBroadcast(const ArithmeticParams& params, const RuntimeShape& input1_shape, @@ -3756,31 +3160,6 @@ inline void LstmCell( output_state_map.tanh(); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void LstmCell(const float* input_data, const Dims<4>& input_dims, - const float* prev_activ_data, - const Dims<4>& prev_activ_dims, const float* weights_data, - const Dims<4>& weights_dims, const float* bias_data, - const Dims<4>& bias_dims, const float* prev_state_data, - const Dims<4>& prev_state_dims, float* output_state_data, - const Dims<4>& output_state_dims, float* output_activ_data, - const Dims<4>& output_activ_dims, float* concat_temp_data, - const Dims<4>& concat_temp_dims, float* activ_temp_data, - const Dims<4>& activ_temp_dims) { - tflite::LstmCellParams op_params; - // Float LSTM cell does not need parameters to be set: leave untouched. - - LstmCell(op_params, DimsToShape(input_dims), input_data, - DimsToShape(prev_activ_dims), prev_activ_data, - DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims), - bias_data, DimsToShape(prev_state_dims), prev_state_data, - DimsToShape(output_state_dims), output_state_data, - DimsToShape(output_activ_dims), output_activ_data, - DimsToShape(concat_temp_dims), concat_temp_data, - DimsToShape(activ_temp_dims), activ_temp_data); -} - // Quantized LSTM cell. Currently just a copy of the reference impl in // reference_ops.h. See the big function comment there, not replicating it // here. @@ -4071,37 +3450,6 @@ inline void LstmCell( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, - const uint8* prev_activ_data_uint8, - const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8, - const Dims<4>& weights_dims, const int32* bias_data_int32, - const Dims<4>& bias_dims, const int16* prev_state_data_int16, - const Dims<4>& prev_state_dims, int16* output_state_data_int16, - const Dims<4>& output_state_dims, uint8* output_activ_data_uint8, - const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8, - const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16, - const Dims<4>& activ_temp_dims, int32 weights_zero_point, - int32 accum_multiplier, int accum_shift, - gemmlowp::GemmContext* gemm_context) { - tflite::LstmCellParams op_params; - op_params.weights_zero_point = weights_zero_point; - op_params.accum_multiplier = accum_multiplier; - op_params.accum_shift = accum_shift; - - LstmCell( - op_params, DimsToShape(input_dims), input_data_uint8, - DimsToShape(prev_activ_dims), prev_activ_data_uint8, - DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims), - bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16, - DimsToShape(output_state_dims), output_state_data_int16, - DimsToShape(output_activ_dims), output_activ_data_uint8, - DimsToShape(concat_temp_dims), concat_temp_data_uint8, - DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context); -} - inline int NodeOffset(int b, int h, int w, int height, int width) { return (b * height + h) * width + w; } @@ -4561,16 +3909,6 @@ inline void Softmax(const SoftmaxParams& params, out_mat.array().rowwise() *= scale; } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Softmax(const float* input_data, const RuntimeShape& input_shape, - float beta, float* output_data, - const RuntimeShape& output_shape) { - SoftmaxParams params; - params.beta = beta; - Softmax(params, input_shape, input_data, output_shape, output_data); -} - inline void Softmax(const SoftmaxParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& output_shape, uint8* output_data) { @@ -4782,19 +4120,6 @@ inline void Softmax(const SoftmaxParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_beta_multiplier, int32 input_beta_left_shift, - int diff_min, uint8* output_data, - const RuntimeShape& output_shape) { - SoftmaxParams params; - params.input_multiplier = input_beta_multiplier; - params.input_left_shift = input_beta_left_shift; - params.diff_min = diff_min; - Softmax(params, input_shape, input_data, output_shape, output_data); -} - // TODO(myenik): This is the same as the reference implementation, not actually // optimized yet. inline void LogSoftmax(const SoftmaxParams& params, @@ -4832,15 +4157,6 @@ inline void LogSoftmax(const SoftmaxParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy -inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { - SoftmaxParams params; - // No params currently used for float LogSoftmax. - LogSoftmax(params, input_shape, input_data, output_shape, output_data); -} - template inline gemmlowp::FixedPoint log_x_for_x_greater_than_or_equal_to_1_impl( @@ -5045,22 +4361,6 @@ inline void LogSoftmax(const SoftmaxParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_multiplier, int32 input_left_shift, - int32 reverse_scaling_divisor, - int32 reverse_scaling_right_shift, int diff_min, - uint8* output_data, const RuntimeShape& output_shape) { - SoftmaxParams params; - params.input_multiplier = input_multiplier; - params.input_left_shift = input_left_shift; - params.reverse_scaling_divisor = reverse_scaling_divisor; - params.reverse_scaling_right_shift = reverse_scaling_right_shift; - params.diff_min = diff_min; - LogSoftmax(params, input_shape, input_data, output_shape, output_data); -} - inline void Logistic(const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { gemmlowp::ScopedProfilingLabel label("Logistic"); @@ -5219,20 +4519,6 @@ inline void Logistic(const LogisticParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { - LogisticParams params; - params.input_zero_point = input_zero_point; - params.input_range_radius = input_range_radius; - params.input_multiplier = input_multiplier; - params.input_left_shift = input_left_shift; - Logistic(params, input_shape, input_data, output_shape, output_data); -} - inline void Logistic(const LogisticParams& params, const RuntimeShape& input_shape, const int16* input_data, const RuntimeShape& output_shape, int16* output_data) { @@ -5294,24 +4580,6 @@ inline void Logistic(const LogisticParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy version. -inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, - const RuntimeShape& output_shape, int16* output_data) { - LogisticParams params; - // No params currently needed by int16 Logistic. - Logistic(params, input_shape, input_data, output_shape, output_data); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy version. -inline void Logistic(const int16* input_data, const RuntimeShape& input_shape, - int16* output_data, const RuntimeShape& output_shape) { - LogisticParams params; - // No params currently needed by int16 Logistic. - Logistic(params, input_shape, input_data, output_shape, output_data); -} - inline void Tanh(const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { gemmlowp::ScopedProfilingLabel label("Tanh"); @@ -5479,20 +4747,6 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { - TanhParams params; - params.input_zero_point = input_zero_point; - params.input_range_radius = input_range_radius; - params.input_multiplier = input_multiplier; - params.input_left_shift = input_left_shift; - Tanh(params, input_shape, input_data, output_shape, output_data); -} - inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, const int16* input_data, const RuntimeShape& output_shape, int16* output_data) { @@ -5594,16 +4848,6 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, - int input_left_shift, int16* output_data, - const RuntimeShape& output_shape) { - TanhParams params; - params.input_left_shift = input_left_shift; - Tanh(params, input_shape, input_data, output_shape, output_data); -} - template inline void Cast(const RuntimeShape& input_shape, const SrcT* input_data, const RuntimeShape& output_shape, DstT* output_data) { @@ -6486,27 +5730,6 @@ void TransposeIm2col(const ConvParams& params, uint8 zero_byte, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -void TransposeIm2col(const T* input_data, const Dims<4>& input_dims, - const Dims<4>& filter_dims, int stride_width, - int stride_height, int pad_width, int pad_height, - const Dims<4>& output_dims, uint8 zero_byte, - T* im2col_data) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - - TransposeIm2col(op_params, zero_byte, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), DimsToShape(output_dims), - im2col_data); -} - inline void TransposeConv( const ConvParams& params, const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& filter_shape, @@ -6530,27 +5753,6 @@ inline void TransposeConv( Gemm(filter_matrix_map.transpose(), im2col_matrix_map, &output_matrix_map); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, float* output_data, - const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - - TransposeConv(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(output_dims), - output_data, DimsToShape(im2col_dims), im2col_data); -} - } // namespace optimized_ops } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h index a8428528c9..11224270a4 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h @@ -94,81 +94,6 @@ inline void DepthwiseConv( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, - int dilation_width_factor, int dilation_height_factor, - int pad_width, int pad_height, int depth_multiplier, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { - tflite::DepthwiseParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = dilation_width_factor; - op_params.dilation_height_factor = dilation_height_factor; - op_params.depth_multiplier = depth_multiplier; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - DepthwiseConv(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { - DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, - bias_dims, stride_width, stride_height, 1, 1, pad_width, - pad_height, depth_multiplier, output_activation_min, - output_activation_max, output_data, output_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy, for compatibility with old checked-in code. -template -void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, float* output_data, - const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, - bias_dims, stride_width, stride_height, pad_width, pad_height, - depth_multiplier, output_activation_min, output_activation_max, - output_data, output_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy, for compatibility with old checked-in code. -template -void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, int stride, - int pad_width, int pad_height, int depth_multiplier, - float* output_data, const Dims<4>& output_dims) { - DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, - bias_dims, stride, stride, pad_width, pad_height, - depth_multiplier, output_data, output_dims); -} - } // end namespace reference_ops } // end namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h index e8fc566502..eab28e6c84 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h @@ -25,9 +25,6 @@ limitations under the License. namespace tflite { namespace reference_ops { -// TODO(b/80418076): Move to legacy ops file, along with invocations. -static constexpr int kDepthwiseReverseShift = -1; - inline void DepthwiseConv( const DepthwiseParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape, @@ -109,106 +106,6 @@ inline void DepthwiseConv( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, - int dilation_width_factor, int dilation_height_factor, - int pad_width, int pad_height, int depth_multiplier, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - tflite::DepthwiseParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = dilation_width_factor; - op_params.dilation_height_factor = dilation_height_factor; - op_params.depth_multiplier = depth_multiplier; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - op_params.input_offset = input_offset; - op_params.weights_offset = filter_offset; - op_params.output_offset = output_offset; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kDepthwiseReverseShift * output_shift; - - DepthwiseConv(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride_width, - stride_height, 1, 1, pad_width, pad_height, depth_multiplier, - output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, - output_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy, for compatibility with old checked-in code. -template -void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int depth_multiplier, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims) { - if (Ac == FusedActivationFunctionType::kNone) { - TFLITE_DCHECK_EQ(output_activation_min, 0); - TFLITE_DCHECK_EQ(output_activation_max, 255); - } - DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride_width, - stride_height, pad_width, pad_height, depth_multiplier, - output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, - output_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy, for compatibility with old checked-in code. -template -void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, int stride, - int pad_width, int pad_height, int depth_multiplier, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims) { - DepthwiseConv(input_data, input_dims, input_offset, filter_data, - filter_dims, filter_offset, bias_data, bias_dims, stride, - stride, pad_width, pad_height, depth_multiplier, - output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, - output_dims); -} - } // end namespace reference_ops } // end namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h b/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h index 23325e8c4c..3c7fd29256 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/fully_connected.h @@ -62,39 +62,6 @@ inline void FullyConnected( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, - const float* weights_data, - const Dims<4>& weights_dims, const float* bias_data, - const Dims<4>& bias_dims, - float output_activation_min, - float output_activation_max, float* output_data, - const Dims<4>& output_dims) { - tflite::FullyConnectedParams op_params; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - FullyConnected(op_params, DimsToShape(input_dims), input_data, - DimsToShape(weights_dims), weights_data, - DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), - output_data); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void FullyConnected(const float* input_data, const Dims<4>& input_dims, - const float* weights_data, const Dims<4>& weights_dims, - const float* bias_data, const Dims<4>& bias_dims, - float* output_data, const Dims<4>& output_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data, - bias_dims, output_activation_min, output_activation_max, - output_data, output_dims); -} - inline void FullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape, @@ -144,32 +111,6 @@ inline void FullyConnected( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims, void* gemm_context) { - tflite::FullyConnectedParams op_params; - op_params.input_offset = input_offset; - op_params.weights_offset = filter_offset; - op_params.output_offset = output_offset; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kReverseShift * output_shift; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - FullyConnected(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data, - gemm_context); -} - inline void FullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape, @@ -224,32 +165,6 @@ inline void FullyConnected( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, int16* output_data, - const Dims<4>& output_dims, void* gemm_context) { - tflite::FullyConnectedParams op_params; - op_params.input_offset = input_offset; - op_params.weights_offset = filter_offset; - op_params.output_offset = output_offset; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kReverseShift * output_shift; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - FullyConnected(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), - bias_data, DimsToShape(output_dims), output_data, - gemm_context); -} - inline void ShuffledFullyConnected( const FullyConnectedParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& weights_shape, @@ -405,55 +320,6 @@ inline void ShuffledFullyConnected( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void ShuffledFullyConnected( - const uint8* input_data, const Dims<4>& input_dims, - const uint8* shuffled_weights_data, const Dims<4>& weights_dims, - const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, - int output_shift, int32 output_activation_min, int32 output_activation_max, - int16* output_data, const Dims<4>& output_dims, - uint8* shuffled_input_workspace_data, void* gemm_context) { - tflite::FullyConnectedParams op_params; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kReverseShift * output_shift; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data, - DimsToShape(weights_dims), shuffled_weights_data, - DimsToShape(bias_dims), bias_data, - DimsToShape(output_dims), output_data, - shuffled_input_workspace_data, gemm_context); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims, void* gemm_context) { - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - if (Ac == FusedActivationFunctionType::kNone) { - TFLITE_DCHECK_EQ(output_activation_min, 0); - TFLITE_DCHECK_EQ(output_activation_max, 255); - } - FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, output_offset, - output_multiplier, output_shift, output_activation_min, - output_activation_max, output_data, output_dims, gemm_context); -} - } // namespace reference_ops } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h index 683ccdc74d..be99240b1f 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/legacy_reference_ops.h @@ -19,6 +19,8 @@ limitations under the License. #include #include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_float.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/depthwiseconv_uint8.h" #include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" #include "tensorflow/contrib/lite/kernels/internal/types.h" @@ -26,6 +28,1070 @@ namespace tflite { namespace reference_ops { +static constexpr int kDepthwiseReverseShift = -1; + +inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, + int dilation_width_factor, int dilation_height_factor, + int pad_width, int pad_height, int depth_multiplier, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.depth_multiplier = depth_multiplier; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + DepthwiseConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data); +} + +inline void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride_width, stride_height, 1, 1, pad_width, + pad_height, depth_multiplier, output_activation_min, + output_activation_max, output_data, output_dims); +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, float* output_data, + const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride_width, stride_height, pad_width, pad_height, + depth_multiplier, output_activation_min, output_activation_max, + output_data, output_dims); +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + float* output_data, const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride, stride, pad_width, pad_height, + depth_multiplier, output_data, output_dims); +} + +inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, + int dilation_width_factor, int dilation_height_factor, + int pad_width, int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + tflite::DepthwiseParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.depth_multiplier = depth_multiplier; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kDepthwiseReverseShift * output_shift; + + DepthwiseConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data); +} + +inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, + stride_height, 1, 1, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int depth_multiplier, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + DepthwiseConv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, + stride_height, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +// Legacy, for compatibility with old checked-in code. +template +void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + DepthwiseConv(input_data, input_dims, input_offset, filter_data, + filter_dims, filter_offset, bias_data, bias_dims, stride, + stride, pad_width, pad_height, depth_multiplier, + output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims); +} + +inline void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + float output_activation_min, float output_activation_max, + float* output_data, const Dims<4>& output_dims, + float* im2col_data, const Dims<4>& im2col_dims) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), + filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data); +} + +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + float* output_data, const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, + stride_width, stride_height, dilation_width_factor, + dilation_height_factor, pad_width, pad_height, output_activation_min, + output_activation_max, output_data, output_dims, im2col_data, + im2col_dims); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, + stride_width, stride_height, 1, 1, pad_width, pad_height, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + const float* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + Conv(input_data, input_dims, filter_data, filter_dims, bias_data, + bias_dims, stride, stride, 1, 1, pad_width, pad_height, output_data, + output_dims, im2col_data, im2col_dims); +} + +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int dilation_width_factor, + int dilation_height_factor, int pad_width, int pad_height, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, + uint8* im2col_data, const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + op_params.dilation_width_factor = dilation_width_factor; + op_params.dilation_height_factor = dilation_height_factor; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kReverseShift * output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), + filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data, gemm_context); +} + +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, stride_height, 1, 1, + pad_width, pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims, gemm_context); +} + +// legacy, for compatibility with old checked-in code +template +inline void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, + gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, stride_height, + pad_width, pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, output_dims, + im2col_data, im2col_dims, gemm_context); +} + +// legacy, for compatibility with old checked-in code +template +void Conv(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride, + int pad_width, int pad_height, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data, + const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) { + Conv(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride, stride, pad_width, + pad_height, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_data, + output_dims, im2col_data, im2col_dims, gemm_context); +} + +inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, float* output_data, + const Dims<4>& output_dims, float* im2col_data, + const Dims<4>& im2col_dims) { + tflite::ConvParams op_params; + // Padding type is ignored, but still set. + op_params.padding_type = PaddingType::kSame; + op_params.padding_values.width = pad_width; + op_params.padding_values.height = pad_height; + op_params.stride_width = stride_width; + op_params.stride_height = stride_height; + + TransposeConv(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(output_dims), + output_data, DimsToShape(im2col_dims), im2col_data); +} + +inline void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, + float output_activation_min, + float output_activation_max, float* output_data, + const Dims<4>& output_dims) { + tflite::FullyConnectedParams op_params; + op_params.float_activation_min = output_activation_min; + op_params.float_activation_max = output_activation_max; + + FullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(weights_dims), weights_data, + DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), + output_data); +} + +// legacy, for compatibility with old checked-in code +template +void FullyConnected(const float* input_data, const Dims<4>& input_dims, + const float* weights_data, const Dims<4>& weights_dims, + const float* bias_data, const Dims<4>& bias_dims, + float* output_data, const Dims<4>& output_dims) { + float output_activation_min, output_activation_max; + GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); + FullyConnected(input_data, input_dims, weights_data, weights_dims, bias_data, + bias_dims, output_activation_min, output_activation_max, + output_data, output_dims); +} + +inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + tflite::FullyConnectedParams op_params; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kReverseShift * output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + FullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data, + gemm_context); +} + +inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, int16* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + tflite::FullyConnectedParams op_params; + op_params.input_offset = input_offset; + op_params.weights_offset = filter_offset; + op_params.output_offset = output_offset; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kReverseShift * output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + FullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(filter_dims), filter_data, DimsToShape(bias_dims), + bias_data, DimsToShape(output_dims), output_data, + gemm_context); +} + +inline void ShuffledFullyConnected( + const uint8* input_data, const Dims<4>& input_dims, + const uint8* shuffled_weights_data, const Dims<4>& weights_dims, + const int32* bias_data, const Dims<4>& bias_dims, int32 output_multiplier, + int output_shift, int32 output_activation_min, int32 output_activation_max, + int16* output_data, const Dims<4>& output_dims, + uint8* shuffled_input_workspace_data, gemmlowp::GemmContext* gemm_context) { + tflite::FullyConnectedParams op_params; + op_params.output_multiplier = output_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.output_shift = kReverseShift * output_shift; + op_params.quantized_activation_min = output_activation_min; + op_params.quantized_activation_max = output_activation_max; + + ShuffledFullyConnected(op_params, DimsToShape(input_dims), input_data, + DimsToShape(weights_dims), shuffled_weights_data, + DimsToShape(bias_dims), bias_data, + DimsToShape(output_dims), output_data, + shuffled_input_workspace_data, gemm_context); +} + +// legacy, for compatibility with old checked-in code +template +void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + static_assert(Ac == FusedActivationFunctionType::kNone || + Ac == FusedActivationFunctionType::kRelu || + Ac == FusedActivationFunctionType::kRelu6 || + Ac == FusedActivationFunctionType::kRelu1, + ""); + if (Ac == FusedActivationFunctionType::kNone) { + TFLITE_DCHECK_EQ(output_activation_min, 0); + TFLITE_DCHECK_EQ(output_activation_max, 255); + } + FullyConnected(input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims, gemm_context); +} + +inline void LstmCell(const float* input_data, const Dims<4>& input_dims, + const float* prev_activ_data, + const Dims<4>& prev_activ_dims, const float* weights_data, + const Dims<4>& weights_dims, const float* bias_data, + const Dims<4>& bias_dims, const float* prev_state_data, + const Dims<4>& prev_state_dims, float* output_state_data, + const Dims<4>& output_state_dims, float* output_activ_data, + const Dims<4>& output_activ_dims, float* concat_temp_data, + const Dims<4>& concat_temp_dims, float* activ_temp_data, + const Dims<4>& activ_temp_dims) { + tflite::LstmCellParams op_params; + // Float LSTM cell does not need parameters to be set: leave untouched. + + LstmCell(op_params, DimsToShape(input_dims), input_data, + DimsToShape(prev_activ_dims), prev_activ_data, + DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims), + bias_data, DimsToShape(prev_state_dims), prev_state_data, + DimsToShape(output_state_dims), output_state_data, + DimsToShape(output_activ_dims), output_activ_data, + DimsToShape(concat_temp_dims), concat_temp_data, + DimsToShape(activ_temp_dims), activ_temp_data); +} + +template +void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, + const uint8* prev_activ_data_uint8, + const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8, + const Dims<4>& weights_dims, const int32* bias_data_int32, + const Dims<4>& bias_dims, const int16* prev_state_data_int16, + const Dims<4>& prev_state_dims, int16* output_state_data_int16, + const Dims<4>& output_state_dims, uint8* output_activ_data_uint8, + const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8, + const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16, + const Dims<4>& activ_temp_dims, int32 weights_zero_point, + int32 accum_multiplier, int accum_shift, + gemmlowp::GemmContext* gemm_context) { + tflite::LstmCellParams op_params; + op_params.weights_zero_point = weights_zero_point; + op_params.accum_multiplier = accum_multiplier; + op_params.accum_shift = accum_shift; + + LstmCell( + op_params, DimsToShape(input_dims), input_data_uint8, + DimsToShape(prev_activ_dims), prev_activ_data_uint8, + DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims), + bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16, + DimsToShape(output_state_dims), output_state_data_int16, + DimsToShape(output_activ_dims), output_activ_data_uint8, + DimsToShape(concat_temp_dims), concat_temp_data_uint8, + DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context); +} + +template +void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + tflite::ArithmeticParams op_params; + SetActivationParams(output_activation_min, output_activation_max, &op_params); + + BroadcastDiv4DSlow(op_params, DimsToShape(input1_dims), input1_data, + DimsToShape(input2_dims), input2_data, + DimsToShape(output_dims), output_data); +} + +template +inline void Div(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + T output_activation_min, T output_activation_max, + T* output_data, const Dims<4>& output_dims) { + tflite::ArithmeticParams op_params; + SetActivationParams(output_activation_min, output_activation_max, &op_params); + + Div(op_params, DimsToShape(input1_dims), input1_data, + DimsToShape(input2_dims), input2_data, DimsToShape(output_dims), + output_data); +} + +template +inline void Concatenation(int concat_dim, const Scalar* const* input_data, + const Dims<4>* const* input_dims, int inputs_count, + Scalar* output_data, const Dims<4>& output_dims) { + // For now we don't have a model with a Concatenation with fused activation. + TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone); + + std::vector input_shapes(inputs_count); + std::vector input_shapes_indirect(inputs_count); + for (int i = 0; i < inputs_count; ++i) { + ShapeFromDims(*input_dims[i], &input_shapes[i]); + input_shapes_indirect[i] = &input_shapes[i]; + } + tflite::ConcatenationParams op_params; + op_params.axis = 3 - concat_dim; + op_params.inputs_count = inputs_count; + + Concatenation(op_params, input_shapes_indirect.data(), input_data, + DimsToShape(output_dims), output_data); +} + +inline void Concatenation(int concat_dim, const uint8* const* input_data, + const Dims<4>* const* input_dims, + const int32* input_zeropoint, + const float* input_scale, int inputs_count, + uint8* output_data, const Dims<4>& output_dims, + const int32 output_zeropoint, + const float output_scale) { + std::vector input_shapes(inputs_count); + std::vector input_shapes_indirect(inputs_count); + for (int i = 0; i < inputs_count; ++i) { + ShapeFromDims(*input_dims[i], &input_shapes[i]); + input_shapes_indirect[i] = &input_shapes[i]; + } + tflite::ConcatenationParams op_params; + op_params.axis = 3 - concat_dim; + op_params.input_zeropoint = input_zeropoint; + op_params.input_scale = input_scale; + op_params.inputs_count = inputs_count; + op_params.output_zeropoint = output_zeropoint; + op_params.output_scale = output_scale; + + ConcatenationWithScaling(op_params, input_shapes_indirect.data(), input_data, + DimsToShape(output_dims), output_data); +} + +template +void DepthConcatenation(const Scalar* const* input_data, + const Dims<4>* const* input_dims, int inputs_count, + Scalar* output_data, const Dims<4>& output_dims) { + // For now we don't have a model with a Concatenation with fused activation. + TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone); + + std::vector input_shapes(inputs_count); + std::vector input_shapes_indirect(inputs_count); + for (int i = 0; i < inputs_count; ++i) { + ShapeFromDims(*input_dims[i], &input_shapes[i]); + input_shapes_indirect[i] = &input_shapes[i]; + } + tflite::ConcatenationParams op_params; + op_params.inputs_count = inputs_count; + + DepthConcatenation(op_params, input_shapes_indirect.data(), input_data, + DimsToShape(output_dims), output_data); +} + +template +void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, + int axis, int outputs_count, Scalar* const* output_data, + const Dims<4>* const* output_dims) { + std::vector output_shapes(outputs_count); + std::vector output_shapes_indirect(outputs_count); + for (int i = 0; i < outputs_count; ++i) { + ShapeFromDims(*output_dims[i], &output_shapes[i]); + output_shapes_indirect[i] = &output_shapes[i]; + } + tflite::SplitParams op_params; + op_params.axis = 3 - axis; + op_params.num_split = outputs_count; + + Split(op_params, DimsToShape(input_dims), input_data, + output_shapes_indirect.data(), output_data); +} + +template +void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, + int outputs_count, Scalar* const* output_data, + const Dims<4>* const* output_dims) { + TFLITE_DCHECK_GE(outputs_count, 1); + for (int i = 0; i < outputs_count; i++) { + /* batches = */ MatchingArraySize(*output_dims[i], 3, input_dims, 3); + /* height = */ MatchingArraySize(*output_dims[i], 2, input_dims, 2); + /* width = */ MatchingArraySize(*output_dims[i], 1, input_dims, 1); + } + // For now we don't have a model with a Split with fused activation. + TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone); + + TensorFlowSplit(input_data, input_dims, /*axis=*/0, outputs_count, + output_data, output_dims); +} + +inline void Softmax(const float* input_data, const RuntimeShape& input_shape, + float beta, float* output_data, + const RuntimeShape& output_shape) { + SoftmaxParams params; + params.beta = beta; + Softmax(params, input_shape, input_data, output_shape, output_data); +} + +inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_beta_multiplier, int32 input_beta_left_shift, + int diff_min, uint8* output_data, + const RuntimeShape& output_shape) { + SoftmaxParams params; + params.input_multiplier = input_beta_multiplier; + params.input_left_shift = input_beta_left_shift; + params.diff_min = diff_min; + Softmax(params, input_shape, input_data, output_shape, output_data); +} + +inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, + float* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + // No params currently used for float LogSoftmax. + LogSoftmax(params, input_shape, input_data, output_shape, output_data); +} + +inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_multiplier, int32 input_left_shift, + int32 reverse_scaling_divisor, + int32 reverse_scaling_right_shift, int diff_min, + uint8* output_data, const RuntimeShape& output_shape) { + SoftmaxParams params; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + params.reverse_scaling_divisor = reverse_scaling_divisor; + params.reverse_scaling_right_shift = reverse_scaling_right_shift; + params.diff_min = diff_min; + LogSoftmax(params, input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const RuntimeShape& output_shape) { + LogisticParams params; + params.input_zero_point = input_zero_point; + params.input_range_radius = input_range_radius; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + Logistic(params, input_shape, input_data, output_shape, output_data); +} + +inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, + const RuntimeShape& output_shape, int16* output_data) { + LogisticParams params; + // No params currently needed by int16 Logistic. + Logistic(params, input_shape, input_data, output_shape, output_data); +} + +inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, + int32 input_zero_point, int32 input_range_radius, + int32 input_multiplier, int input_left_shift, + uint8* output_data, const RuntimeShape& output_shape) { + TanhParams params; + params.input_zero_point = input_zero_point; + params.input_range_radius = input_range_radius; + params.input_multiplier = input_multiplier; + params.input_left_shift = input_left_shift; + Tanh(params, input_shape, input_data, output_shape, output_data); +} + +inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, + int input_left_shift, int16* output_data, + const RuntimeShape& output_shape) { + TanhParams params; + params.input_left_shift = input_left_shift; + Tanh(params, input_shape, input_data, output_shape, output_data); +} + +inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims, + int32 zero_point, double scale, float* output_data, + const Dims<4>& output_dims) { + tflite::DequantizationParams op_params; + op_params.zero_point = zero_point; + op_params.scale = scale; + + Dequantize(op_params, DimsToShape(input_dims), input_data, + DimsToShape(output_dims), output_data); +} + +inline void FakeQuant(const float* input_data, const Dims<4>& input_dims, + float rmin, float rmax, int num_bits, float* output_data, + const Dims<4>& output_dims) { + tflite::FakeQuantParams op_params; + op_params.num_bits = num_bits; + op_params.minmax.min = rmin; + op_params.minmax.max = rmax; + + FakeQuant(op_params, DimsToShape(input_dims), input_data, + DimsToShape(output_dims), output_data); +} + +template +inline void Gather(const T* input_data, const Dims<4>& input_dims, + int input_rank, const int32* coords_data, + const Dims<4>& coords_dims, T* output_data, + const Dims<4>& output_dims) { + tflite::GatherParams op_params; + op_params.input_rank = input_rank; + + Gather(op_params, DimsToShape(input_dims), input_data, + DimsToShape(coords_dims), coords_data, DimsToShape(output_dims), + output_data); +} + +inline uint32 LegacyReverseBits32(uint32 n) { + n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1); + n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2); + n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4); + return (((n & 0xFF) << 24) | ((n & 0xFF00) << 8) | ((n & 0xFF0000) >> 8) | + ((n & 0xFF000000) >> 24)); +} + +inline void StridedSliceReverseIndices(tflite::StridedSliceParams* p) { + TFLITE_CHECK_EQ(p->start_indices_count, p->stop_indices_count); + TFLITE_CHECK_EQ(p->stop_indices_count, p->strides_count); + + std::reverse(p->start_indices, p->start_indices + p->start_indices_count); + std::reverse(p->stop_indices, p->stop_indices + p->stop_indices_count); + std::reverse(p->strides, p->strides + p->strides_count); + + p->begin_mask = LegacyReverseBits32(static_cast(p->begin_mask)) >> + (32 - p->start_indices_count); + p->ellipsis_mask = + LegacyReverseBits32(static_cast(p->ellipsis_mask)) >> + (32 - p->start_indices_count); + p->end_mask = LegacyReverseBits32(static_cast(p->end_mask)) >> + (32 - p->start_indices_count); + p->new_axis_mask = + LegacyReverseBits32(static_cast(p->new_axis_mask)) >> + (32 - p->start_indices_count); + p->shrink_axis_mask = + LegacyReverseBits32(static_cast(p->shrink_axis_mask)) >> + (32 - p->start_indices_count); +} + +template +inline void StridedSlice(const T* input_data, const Dims<4>& input_dims, + int begin_mask, int end_mask, int shrink_axis_mask, + const std::vector& start_indices, + const std::vector& stop_indices, + const std::vector& strides, T* output_data, + const Dims<4>& output_dims) { + TFLITE_DCHECK_EQ(start_indices.size(), 4); + auto op_params = strided_slice::BuildStridedSliceParams( + begin_mask, end_mask, shrink_axis_mask, start_indices, stop_indices, + strides); + StridedSliceReverseIndices(&op_params); + + StridedSlice(op_params, DimsToShape(input_dims), input_data, + DimsToShape(output_dims), output_data); +} + +template +inline void Mean(const T* input_data, const Dims<4>& input_dims, + const std::vector& reduction_indices, T* output_data, + const Dims<4>& output_dims) { + tflite::MeanParams op_params; + op_params.axis_count = reduction_indices.size(); + for (int i = 0; i < op_params.axis_count; ++i) { + op_params.axis[i] = reduction_indices[op_params.axis_count - 1 - i]; + } + + Mean(op_params, DimsToShape(input_dims), input_data, DimsToShape(output_dims), + output_data); +} + +template +void Transpose(const T* input, const Dims<4>& input_dims, T* output, + const Dims<4>& output_dims, const int* permuted_axes) { + TransposeParams params; + params.perm_count = 4; + for (int i = 0; i < 4; ++i) { + params.perm[i] = 3 - permuted_axes[3 - i]; + } + Transpose(params, DimsToShape(input_dims), input, DimsToShape(output_dims), + output); +} + +template F> +inline void Comparison(const T* input1_data, const Dims<4>& input1_dims, + const T* input2_data, const Dims<4>& input2_dims, + bool* output_data, const Dims<4>& output_dims) { + ComparisonParams op_params; + // No parameters needed. + ComparisonImpl(op_params, DimsToShape(input1_dims), input1_data, + DimsToShape(input2_dims), input2_data, + DimsToShape(output_dims), output_data); +} + +template F> +inline void Comparison(int left_shift, const T* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const T* input2_data, const Dims<4>& input2_dims, + int32 input2_offset, int32 input2_multiplier, + int input2_shift, bool* output_data, + const Dims<4>& output_dims) { + tflite::ComparisonParams op_params; + op_params.left_shift = left_shift; + op_params.input1_offset = input1_offset; + op_params.input1_multiplier = input1_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.input1_shift = kReverseShift * input1_shift; + op_params.input2_offset = input2_offset; + op_params.input2_multiplier = input2_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.input2_shift = kReverseShift * input2_shift; + + ComparisonWithScaling(op_params, DimsToShape(input1_dims), input1_data, + DimsToShape(input2_dims), input2_data, + DimsToShape(output_dims), output_data); +} + +template F> +inline void BroadcastComparison(const T* input1_data, + const Dims<4>& input1_dims, + const T* input2_data, + const Dims<4>& input2_dims, bool* output_data, + const Dims<4>& output_dims) { + ComparisonParams op_params; + // No parameters needed. + BroadcastComparison4DSlowImpl(op_params, DimsToShape(input1_dims), + input1_data, DimsToShape(input2_dims), + input2_data, DimsToShape(output_dims), + output_data); +} + +template F> +inline void BroadcastComparison(int left_shift, const T* input1_data, + const Dims<4>& input1_dims, int32 input1_offset, + int32 input1_multiplier, int input1_shift, + const T* input2_data, + const Dims<4>& input2_dims, int32 input2_offset, + int32 input2_multiplier, int input2_shift, + bool* output_data, const Dims<4>& output_dims) { + ComparisonParams op_params; + + op_params.left_shift = left_shift; + op_params.input1_offset = input1_offset; + op_params.input1_multiplier = input1_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.input1_shift = kReverseShift * input1_shift; + op_params.input2_offset = input2_offset; + op_params.input2_multiplier = input2_multiplier; + // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. + op_params.input2_shift = kReverseShift * input2_shift; + + BroadcastComparison4DSlowWithScaling( + op_params, DimsToShape(input1_dims), input1_data, + DimsToShape(input2_dims), input2_data, DimsToShape(output_dims), + output_data); +} + +#define TFLITE_LEGACY_COMPARISON_OP(name) \ + template \ + inline void name(const T* input1_data, const Dims<4>& input1_dims, \ + const T* input2_data, const Dims<4>& input2_dims, \ + bool* output_data, const Dims<4>& output_dims) { \ + gemmlowp::ScopedProfilingLabel label(#name); \ + Comparison(input1_data, input1_dims, input2_data, \ + input2_dims, output_data, output_dims); \ + } \ + template \ + inline void name( \ + int left_shift, const T* input1_data, const Dims<4>& input1_dims, \ + int32 input1_offset, int32 input1_multiplier, int input1_shift, \ + const T* input2_data, const Dims<4>& input2_dims, int32 input2_offset, \ + int32 input2_multiplier, int input2_shift, bool* output_data, \ + const Dims<4>& output_dims) { \ + gemmlowp::ScopedProfilingLabel label(#name "/8bit"); \ + Comparison(left_shift, input1_data, input1_dims, \ + input1_offset, input1_multiplier, input1_shift, \ + input2_data, input2_dims, input2_offset, \ + input2_multiplier, input2_shift, output_data, \ + output_dims); \ + } \ + template \ + inline void Broadcast##name( \ + const T* input1_data, const Dims<4>& input1_dims, const T* input2_data, \ + const Dims<4>& input2_dims, bool* output_data, \ + const Dims<4>& output_dims) { \ + gemmlowp::ScopedProfilingLabel label("Broadcast" #name); \ + BroadcastComparison(input1_data, input1_dims, input2_data, \ + input2_dims, output_data, output_dims); \ + } \ + template \ + inline void Broadcast##name( \ + int left_shift, const T* input1_data, const Dims<4>& input1_dims, \ + int32 input1_offset, int32 input1_multiplier, int input1_shift, \ + const T* input2_data, const Dims<4>& input2_dims, int32 input2_offset, \ + int32 input2_multiplier, int input2_shift, bool* output_data, \ + const Dims<4>& output_dims) { \ + gemmlowp::ScopedProfilingLabel label("Broadcast" #name "/8bit"); \ + BroadcastComparison(left_shift, input1_data, input1_dims, \ + input1_offset, input1_multiplier, \ + input1_shift, input2_data, input2_dims, \ + input2_offset, input2_multiplier, \ + input2_shift, output_data, output_dims); \ + } +TFLITE_LEGACY_COMPARISON_OP(Equal); +TFLITE_LEGACY_COMPARISON_OP(NotEqual); +TFLITE_LEGACY_COMPARISON_OP(Greater); +TFLITE_LEGACY_COMPARISON_OP(GreaterEqual); +TFLITE_LEGACY_COMPARISON_OP(Less); +TFLITE_LEGACY_COMPARISON_OP(LessEqual); +#undef TFLITE_LEGACY_COMPARISON_OP + +template +inline void Select(const D* input_condition_data, + const Dims<4>& input_condition_dims, const T* input_x_data, + const Dims<4>& input_x_dims, const T* input_y_data, + const Dims<4>& input_y_dims, T* output_data, + const Dims<4>& output_dims) { + Select(DimsToShape(input_condition_dims), input_condition_data, + DimsToShape(input_x_dims), input_x_data, DimsToShape(input_y_dims), + input_y_data, DimsToShape(output_dims), output_data); +} + +template +inline void RankOneSelect(const D* input_condition_data, + const Dims<4>& input_condition_dims, + const T* input_x_data, const Dims<4>& input_x_dims, + const T* input_y_data, const Dims<4>& input_y_dims, + T* output_data, const Dims<4>& output_dims) { + RankOneSelect(DimsToShape(input_condition_dims), input_condition_data, + DimsToShape(input_x_dims), input_x_data, + DimsToShape(input_y_dims), input_y_data, + DimsToShape(output_dims), output_data); +} + +template +inline void SparseToDense(const std::vector>& indices, + const T* values, T default_value, T* output_data, + const Dims<4>& output_dims, bool value_is_scalar) { + SparseToDense(indices, values, default_value, value_is_scalar, + DimsToShape(output_dims), output_data); +} + +template +void Pack(int dim, const Scalar* const* input_data, + const Dims<4>* const* input_dims, int inputs_count, + Scalar* output_data, const Dims<4>& output_dims) { + std::vector input_shapes(inputs_count); + std::vector input_shapes_indirect(inputs_count); + for (int i = 0; i < inputs_count; ++i) { + ShapeFromDims(*input_dims[i], &input_shapes[i]); + input_shapes_indirect[i] = &input_shapes[i]; + } + tflite::PackParams op_params; + op_params.axis = 3 - dim; + op_params.inputs_count = inputs_count; + + Pack(op_params, input_shapes_indirect.data(), input_data, + DimsToShape(output_dims), output_data); +} + +template +void Unpack(int axis, const Scalar* input_data, const Dims<4>& input_dims, + int dimensions, int outputs_count, Scalar* const* output_datas, + const Dims<4>& output_dims) { + tflite::UnpackParams op_params; + op_params.axis = 3 - axis; + op_params.num_split = outputs_count; + + Unpack(op_params, DimsToShape(input_dims), input_data, + DimsToShape(output_dims), output_datas); +} + +template +void Pack(int dim, const Scalar* const* input_data, + const Dims<4>* const* input_dims, const int32* input_zeropoint, + const float* input_scale, int inputs_count, Scalar* output_data, + const Dims<4>& output_dims, const int32 output_zeropoint, + const float output_scale) { + std::vector input_shapes(inputs_count); + std::vector input_shapes_indirect(inputs_count); + for (int i = 0; i < inputs_count; ++i) { + ShapeFromDims(*input_dims[i], &input_shapes[i]); + input_shapes_indirect[i] = &input_shapes[i]; + } + tflite::PackParams op_params; + op_params.axis = 3 - dim; + op_params.input_zeropoint = input_zeropoint; + op_params.input_scale = input_scale; + op_params.inputs_count = inputs_count; + op_params.output_zeropoint = output_zeropoint; + op_params.output_scale = output_scale; + + PackWithScaling(op_params, input_shapes_indirect.data(), input_data, + DimsToShape(output_dims), output_data); +} + template void L2Normalization(const float* input_data, const RuntimeShape& input_shape, float* output_data, const RuntimeShape& output_shape) { @@ -342,7 +1408,6 @@ inline void AveragePool(const float* input_data, const Dims<4>& input_dims, DimsToShape(output_dims), output_data); } -// Legacy. // Transitional version that will be moved shortly to legacy_reference_ops, as // part of RuntimeShape revisions. inline void BroadcastMul4DSlow(const uint8* input1_data, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index f3f1595035..59f17ae854 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -231,83 +231,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int dilation_width_factor, - int dilation_height_factor, int pad_width, int pad_height, - float output_activation_min, float output_activation_max, - float* output_data, const Dims<4>& output_dims, - float* im2col_data, const Dims<4>& im2col_dims) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = dilation_width_factor; - op_params.dilation_height_factor = dilation_height_factor; - op_params.float_activation_min = output_activation_min; - op_params.float_activation_max = output_activation_max; - - Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), - filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), - output_data, DimsToShape(im2col_dims), im2col_data); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, int stride_width, - int stride_height, int dilation_width_factor, - int dilation_height_factor, int pad_width, int pad_height, - float* output_data, const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, - stride_width, stride_height, dilation_width_factor, - dilation_height_factor, pad_width, pad_height, output_activation_min, - output_activation_max, output_data, output_dims, im2col_data, - im2col_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, int stride_width, - int stride_height, int pad_width, int pad_height, float* output_data, - const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { - float output_activation_min, output_activation_max; - GetActivationMinMax(Ac, &output_activation_min, &output_activation_max); - Conv(input_data, input_dims, filter_data, filter_dims, bias_data, bias_dims, - stride_width, stride_height, 1, 1, pad_width, pad_height, - output_activation_min, output_activation_max, output_data, output_dims, - im2col_data, im2col_dims); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void Conv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - const float* bias_data, const Dims<4>& bias_dims, int stride, - int pad_width, int pad_height, float* output_data, - const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { - Conv(input_data, input_dims, filter_data, filter_dims, bias_data, - bias_dims, stride, stride, 1, 1, pad_width, pad_height, output_data, - output_dims, im2col_data, im2col_dims); -} - inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& filter_shape, const uint8* filter_data, const RuntimeShape& bias_shape, @@ -391,111 +314,6 @@ inline void Conv(const ConvParams& params, const RuntimeShape& input_shape, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Conv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int dilation_width_factor, - int dilation_height_factor, int pad_width, int pad_height, - int32 output_offset, int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims, - uint8* im2col_data, const Dims<4>& im2col_dims, - gemmlowp::GemmContext* gemm_context) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - op_params.dilation_width_factor = dilation_width_factor; - op_params.dilation_height_factor = dilation_height_factor; - op_params.input_offset = input_offset; - op_params.weights_offset = filter_offset; - op_params.output_offset = output_offset; - op_params.output_multiplier = output_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.output_shift = kReverseShift * output_shift; - op_params.quantized_activation_min = output_activation_min; - op_params.quantized_activation_max = output_activation_max; - - Conv(op_params, DimsToShape(input_dims), input_data, DimsToShape(filter_dims), - filter_data, DimsToShape(bias_dims), bias_data, DimsToShape(output_dims), - output_data, DimsToShape(im2col_dims), im2col_data, gemm_context); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Conv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims, uint8* im2col_data, - const Dims<4>& im2col_dims, - gemmlowp::GemmContext* gemm_context) { - Conv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride_width, stride_height, 1, 1, - pad_width, pad_height, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, output_dims, - im2col_data, im2col_dims, gemm_context); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -inline void Conv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, int32 output_offset, int32 output_multiplier, - int output_shift, int32 output_activation_min, - int32 output_activation_max, uint8* output_data, - const Dims<4>& output_dims, uint8* im2col_data, - const Dims<4>& im2col_dims, - gemmlowp::GemmContext* gemm_context) { - static_assert(Ac == FusedActivationFunctionType::kNone || - Ac == FusedActivationFunctionType::kRelu || - Ac == FusedActivationFunctionType::kRelu6 || - Ac == FusedActivationFunctionType::kRelu1, - ""); - if (Ac == FusedActivationFunctionType::kNone) { - TFLITE_DCHECK_EQ(output_activation_min, 0); - TFLITE_DCHECK_EQ(output_activation_max, 255); - } - Conv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride_width, stride_height, - pad_width, pad_height, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, output_dims, - im2col_data, im2col_dims, gemm_context); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// legacy, for compatibility with old checked-in code -template -void Conv(const uint8* input_data, const Dims<4>& input_dims, - int32 input_offset, const uint8* filter_data, - const Dims<4>& filter_dims, int32 filter_offset, - const int32* bias_data, const Dims<4>& bias_dims, int stride, - int pad_width, int pad_height, int32 output_offset, - int32 output_multiplier, int output_shift, - int32 output_activation_min, int32 output_activation_max, - uint8* output_data, const Dims<4>& output_dims, uint8* im2col_data, - const Dims<4>& im2col_dims, gemmlowp::GemmContext* gemm_context) { - Conv(input_data, input_dims, input_offset, filter_data, filter_dims, - filter_offset, bias_data, bias_dims, stride, stride, pad_width, - pad_height, output_offset, output_multiplier, output_shift, - output_activation_min, output_activation_max, output_data, - output_dims, im2col_data, im2col_dims, gemm_context); -} - template inline void DepthToSpace(const tflite::DepthToSpaceParams& op_params, const RuntimeShape& unextended_input_shape, @@ -1385,21 +1203,6 @@ void BroadcastDiv4DSlow(const ArithmeticParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -template -void BroadcastDiv(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T output_activation_min, T output_activation_max, - T* output_data, const Dims<4>& output_dims) { - tflite::ArithmeticParams op_params; - SetActivationParams(output_activation_min, output_activation_max, &op_params); - - BroadcastDiv4DSlow(op_params, DimsToShape(input1_dims), input1_data, - DimsToShape(input2_dims), input2_data, - DimsToShape(output_dims), output_data); -} - template inline void Div(const ArithmeticParams& params, const RuntimeShape& input1_shape, const T* input1_data, @@ -1418,21 +1221,6 @@ inline void Div(const ArithmeticParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -template -inline void Div(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - T output_activation_min, T output_activation_max, - T* output_data, const Dims<4>& output_dims) { - tflite::ArithmeticParams op_params; - SetActivationParams(output_activation_min, output_activation_max, &op_params); - - Div(op_params, DimsToShape(input1_dims), input1_data, - DimsToShape(input2_dims), input2_data, DimsToShape(output_dims), - output_data); -} - inline void SubNonBroadcast(const ArithmeticParams& params, const RuntimeShape& input1_shape, const float* input1_data, @@ -1772,34 +1560,10 @@ inline void Concatenation(const ConcatenationParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -template -inline void Concatenation(int concat_dim, const Scalar* const* input_data, - const Dims<4>* const* input_dims, int inputs_count, - Scalar* output_data, const Dims<4>& output_dims) { - // For now we don't have a model with a Concatenation with fused activation. - TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone); - - std::vector input_shapes(inputs_count); - std::vector input_shapes_indirect(inputs_count); - for (int i = 0; i < inputs_count; ++i) { - ShapeFromDims(*input_dims[i], &input_shapes[i]); - input_shapes_indirect[i] = &input_shapes[i]; - } - tflite::ConcatenationParams op_params; - op_params.axis = 3 - concat_dim; - op_params.inputs_count = inputs_count; - - Concatenation(op_params, input_shapes_indirect.data(), input_data, - DimsToShape(output_dims), output_data); -} - // TODO(prabhumk): This is the same as the optimized implementation. // TODO(prabhumk): The quantized implementation of concatentation isn't fully // quantized as it takes scale as a floating point value. This should be fixed // when optimizng this routine further. - inline void ConcatenationWithScaling(const ConcatenationParams& params, const RuntimeShape* const* input_shapes, const uint8* const* input_data, @@ -1862,33 +1626,6 @@ inline void ConcatenationWithScaling(const ConcatenationParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -inline void Concatenation(int concat_dim, const uint8* const* input_data, - const Dims<4>* const* input_dims, - const int32* input_zeropoint, - const float* input_scale, int inputs_count, - uint8* output_data, const Dims<4>& output_dims, - const int32 output_zeropoint, - const float output_scale) { - std::vector input_shapes(inputs_count); - std::vector input_shapes_indirect(inputs_count); - for (int i = 0; i < inputs_count; ++i) { - ShapeFromDims(*input_dims[i], &input_shapes[i]); - input_shapes_indirect[i] = &input_shapes[i]; - } - tflite::ConcatenationParams op_params; - op_params.axis = 3 - concat_dim; - op_params.input_zeropoint = input_zeropoint; - op_params.input_scale = input_scale; - op_params.inputs_count = inputs_count; - op_params.output_zeropoint = output_zeropoint; - op_params.output_scale = output_scale; - - ConcatenationWithScaling(op_params, input_shapes_indirect.data(), input_data, - DimsToShape(output_dims), output_data); -} - template void Pack(const PackParams& params, const RuntimeShape* const* input_shapes, const Scalar* const* input_data, const RuntimeShape& output_shape, @@ -2002,26 +1739,6 @@ void DepthConcatenation(const ConcatenationParams& params, output_data); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -template -void DepthConcatenation(const Scalar* const* input_data, - const Dims<4>* const* input_dims, int inputs_count, - Scalar* output_data, const Dims<4>& output_dims) { - // For now we don't have a model with a Concatenation with fused activation. - TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone); - std::vector input_shapes(inputs_count); - std::vector input_shapes_indirect(inputs_count); - for (int i = 0; i < inputs_count; ++i) { - ShapeFromDims(*input_dims[i], &input_shapes[i]); - input_shapes_indirect[i] = &input_shapes[i]; - } - tflite::ConcatenationParams op_params; - op_params.inputs_count = inputs_count; - - DepthConcatenation(op_params, input_shapes_indirect.data(), input_data, - DimsToShape(output_dims), output_data); -} - inline void LstmCell( const LstmCellParams& params, const RuntimeShape& unextended_input_shape, const float* input_data, const RuntimeShape& unextended_prev_activ_shape, @@ -2139,31 +1856,6 @@ inline void LstmCell( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void LstmCell(const float* input_data, const Dims<4>& input_dims, - const float* prev_activ_data, - const Dims<4>& prev_activ_dims, const float* weights_data, - const Dims<4>& weights_dims, const float* bias_data, - const Dims<4>& bias_dims, const float* prev_state_data, - const Dims<4>& prev_state_dims, float* output_state_data, - const Dims<4>& output_state_dims, float* output_activ_data, - const Dims<4>& output_activ_dims, float* concat_temp_data, - const Dims<4>& concat_temp_dims, float* activ_temp_data, - const Dims<4>& activ_temp_dims) { - tflite::LstmCellParams op_params; - // Float LSTM cell does not need parameters to be set: leave untouched. - - LstmCell(op_params, DimsToShape(input_dims), input_data, - DimsToShape(prev_activ_dims), prev_activ_data, - DimsToShape(weights_dims), weights_data, DimsToShape(bias_dims), - bias_data, DimsToShape(prev_state_dims), prev_state_data, - DimsToShape(output_state_dims), output_state_data, - DimsToShape(output_activ_dims), output_activ_data, - DimsToShape(concat_temp_dims), concat_temp_data, - DimsToShape(activ_temp_dims), activ_temp_data); -} - // Quantized LSTM cell implementation. // The quantization of the input, output arrays is as follows: // - The input activations are quantized as uint8 on the interval @@ -2438,37 +2130,6 @@ inline void LstmCell( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims, - const uint8* prev_activ_data_uint8, - const Dims<4>& prev_activ_dims, const uint8* weights_data_uint8, - const Dims<4>& weights_dims, const int32* bias_data_int32, - const Dims<4>& bias_dims, const int16* prev_state_data_int16, - const Dims<4>& prev_state_dims, int16* output_state_data_int16, - const Dims<4>& output_state_dims, uint8* output_activ_data_uint8, - const Dims<4>& output_activ_dims, uint8* concat_temp_data_uint8, - const Dims<4>& concat_temp_dims, int16* activ_temp_data_int16, - const Dims<4>& activ_temp_dims, int32 weights_zero_point, - int32 accum_multiplier, int accum_shift, - gemmlowp::GemmContext* gemm_context) { - tflite::LstmCellParams op_params; - op_params.weights_zero_point = weights_zero_point; - op_params.accum_multiplier = accum_multiplier; - op_params.accum_shift = accum_shift; - - LstmCell( - op_params, DimsToShape(input_dims), input_data_uint8, - DimsToShape(prev_activ_dims), prev_activ_data_uint8, - DimsToShape(weights_dims), weights_data_uint8, DimsToShape(bias_dims), - bias_data_int32, DimsToShape(prev_state_dims), prev_state_data_int16, - DimsToShape(output_state_dims), output_state_data_int16, - DimsToShape(output_activ_dims), output_activ_data_uint8, - DimsToShape(concat_temp_dims), concat_temp_data_uint8, - DimsToShape(activ_temp_dims), activ_temp_data_int16, gemm_context); -} - template void Split(const SplitParams& params, const RuntimeShape& input_shape, const Scalar* input_data, const RuntimeShape* const* output_shapes, @@ -2511,45 +2172,6 @@ void Split(const SplitParams& params, const RuntimeShape& input_shape, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -template -void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, - int axis, int outputs_count, Scalar* const* output_data, - const Dims<4>* const* output_dims) { - std::vector output_shapes(outputs_count); - std::vector output_shapes_indirect(outputs_count); - for (int i = 0; i < outputs_count; ++i) { - ShapeFromDims(*output_dims[i], &output_shapes[i]); - output_shapes_indirect[i] = &output_shapes[i]; - } - tflite::SplitParams op_params; - op_params.axis = 3 - axis; - op_params.num_split = outputs_count; - - Split(op_params, DimsToShape(input_dims), input_data, - output_shapes_indirect.data(), output_data); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -template -void TensorFlowSplit(const Scalar* input_data, const Dims<4>& input_dims, - int outputs_count, Scalar* const* output_data, - const Dims<4>* const* output_dims) { - TFLITE_DCHECK_GE(outputs_count, 1); - for (int i = 0; i < outputs_count; i++) { - /* batches = */ MatchingArraySize(*output_dims[i], 3, input_dims, 3); - /* height = */ MatchingArraySize(*output_dims[i], 2, input_dims, 2); - /* width = */ MatchingArraySize(*output_dims[i], 1, input_dims, 1); - } - // For now we don't have a model with a Split with fused activation. - TFLITE_DCHECK_EQ(Ac, FusedActivationFunctionType::kNone); - - TensorFlowSplit(input_data, input_dims, /*axis=*/0, outputs_count, - output_data, output_dims); -} - inline int NodeOffset(int b, int h, int w, int height, int width) { return (b * height + h) * width + w; } @@ -2880,15 +2502,6 @@ inline void LogSoftmax(const SoftmaxParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy -inline void LogSoftmax(const float* input_data, const RuntimeShape& input_shape, - float* output_data, const RuntimeShape& output_shape) { - SoftmaxParams params; - // No params currently used for float LogSoftmax. - LogSoftmax(params, input_shape, input_data, output_shape, output_data); -} - // Although currently the name of this function says that it cannot handle // values less than 1, in practice it can handle as low as 1/x_max, where // x_max is the largest representable input. In other words, the output range @@ -3093,22 +2706,6 @@ inline void LogSoftmax(const SoftmaxParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void LogSoftmax(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_multiplier, int32 input_left_shift, - int32 reverse_scaling_divisor, - int32 reverse_scaling_right_shift, int diff_min, - uint8* output_data, const RuntimeShape& output_shape) { - SoftmaxParams params; - params.input_multiplier = input_multiplier; - params.input_left_shift = input_left_shift; - params.reverse_scaling_divisor = reverse_scaling_divisor; - params.reverse_scaling_right_shift = reverse_scaling_right_shift; - params.diff_min = diff_min; - LogSoftmax(params, input_shape, input_data, output_shape, output_data); -} - inline void Logistic(const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { const int flat_size = MatchingFlatSize(input_shape, output_shape); @@ -3170,20 +2767,6 @@ inline void Logistic(const LogisticParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Logistic(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { - LogisticParams params; - params.input_zero_point = input_zero_point; - params.input_range_radius = input_range_radius; - params.input_multiplier = input_multiplier; - params.input_left_shift = input_left_shift; - Logistic(params, input_shape, input_data, output_shape, output_data); -} - inline void Logistic(const LogisticParams& params, const RuntimeShape& input_shape, const int16* input_data, const RuntimeShape& output_shape, int16* output_data) { @@ -3203,15 +2786,6 @@ inline void Logistic(const LogisticParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Logistic(const RuntimeShape& input_shape, const int16* input_data, - const RuntimeShape& output_shape, int16* output_data) { - LogisticParams params; - // No params currently needed by int16 Logistic. - Logistic(params, input_shape, input_data, output_shape, output_data); -} - inline void Tanh(const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { const int flat_size = MatchingFlatSize(input_shape, output_shape); @@ -3275,20 +2849,6 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Tanh(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_zero_point, int32 input_range_radius, - int32 input_multiplier, int input_left_shift, - uint8* output_data, const RuntimeShape& output_shape) { - TanhParams params; - params.input_zero_point = input_zero_point; - params.input_range_radius = input_range_radius; - params.input_multiplier = input_multiplier; - params.input_left_shift = input_left_shift; - Tanh(params, input_shape, input_data, output_shape, output_data); -} - inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, const int16* input_data, const RuntimeShape& output_shape, int16* output_data) { @@ -3323,16 +2883,6 @@ inline void Tanh(const TanhParams& params, const RuntimeShape& input_shape, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Tanh(const int16* input_data, const RuntimeShape& input_shape, - int input_left_shift, int16* output_data, - const RuntimeShape& output_shape) { - TanhParams params; - params.input_left_shift = input_left_shift; - Tanh(params, input_shape, input_data, output_shape, output_data); -} - inline void Dequantize(const tflite::DequantizationParams& op_params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& output_shape, float* output_data) { @@ -3347,19 +2897,6 @@ inline void Dequantize(const tflite::DequantizationParams& op_params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims, - int32 zero_point, double scale, float* output_data, - const Dims<4>& output_dims) { - tflite::DequantizationParams op_params; - op_params.zero_point = zero_point; - op_params.scale = scale; - - Dequantize(op_params, DimsToShape(input_dims), input_data, - DimsToShape(output_dims), output_data); -} - inline void FakeQuant(const tflite::FakeQuantParams& op_params, const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& output_shape, float* output_data) { @@ -3383,20 +2920,6 @@ inline void FakeQuant(const tflite::FakeQuantParams& op_params, output_data, flat_size); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -inline void FakeQuant(const float* input_data, const Dims<4>& input_dims, - float rmin, float rmax, int num_bits, float* output_data, - const Dims<4>& output_dims) { - tflite::FakeQuantParams op_params; - op_params.num_bits = num_bits; - op_params.minmax.min = rmin; - op_params.minmax.max = rmax; - - FakeQuant(op_params, DimsToShape(input_dims), input_data, - DimsToShape(output_dims), output_data); -} - template inline void Cast(const RuntimeShape& input_shape, const SrcT* input_data, const RuntimeShape& output_shape, DstT* output_data) { @@ -3456,23 +2979,6 @@ inline void Gather(const tflite::GatherParams& op_params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4> version. -// When moving legacy ops to legacy_reference_ops, replace content with looser -// implementation. -template -inline void Gather(const T* input_data, const Dims<4>& input_dims, - int input_rank, const int32* coords_data, - const Dims<4>& coords_dims, T* output_data, - const Dims<4>& output_dims) { - tflite::GatherParams op_params; - op_params.input_rank = input_rank; - - Gather(op_params, DimsToShape(input_dims), input_data, - DimsToShape(coords_dims), coords_data, DimsToShape(output_dims), - output_data); -} - template inline void ResizeBilinear(const tflite::ResizeBilinearParams& op_params, const RuntimeShape& unextended_input_shape, @@ -3802,58 +3308,6 @@ inline void StridedSlice(const tflite::StridedSliceParams& op_params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline uint32 LegacyReverseBits32(uint32 n) { - n = ((n >> 1) & 0x55555555) | ((n & 0x55555555) << 1); - n = ((n >> 2) & 0x33333333) | ((n & 0x33333333) << 2); - n = ((n >> 4) & 0x0F0F0F0F) | ((n & 0x0F0F0F0F) << 4); - return (((n & 0xFF) << 24) | ((n & 0xFF00) << 8) | ((n & 0xFF0000) >> 8) | - ((n & 0xFF000000) >> 24)); -} - -inline void StridedSliceReverseIndices(tflite::StridedSliceParams* p) { - TFLITE_CHECK_EQ(p->start_indices_count, p->stop_indices_count); - TFLITE_CHECK_EQ(p->stop_indices_count, p->strides_count); - - std::reverse(p->start_indices, p->start_indices + p->start_indices_count); - std::reverse(p->stop_indices, p->stop_indices + p->stop_indices_count); - std::reverse(p->strides, p->strides + p->strides_count); - - p->begin_mask = LegacyReverseBits32(static_cast(p->begin_mask)) >> - (32 - p->start_indices_count); - p->ellipsis_mask = - LegacyReverseBits32(static_cast(p->ellipsis_mask)) >> - (32 - p->start_indices_count); - p->end_mask = LegacyReverseBits32(static_cast(p->end_mask)) >> - (32 - p->start_indices_count); - p->new_axis_mask = - LegacyReverseBits32(static_cast(p->new_axis_mask)) >> - (32 - p->start_indices_count); - p->shrink_axis_mask = - LegacyReverseBits32(static_cast(p->shrink_axis_mask)) >> - (32 - p->start_indices_count); -} - -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -inline void StridedSlice(const T* input_data, const Dims<4>& input_dims, - int begin_mask, int end_mask, int shrink_axis_mask, - const std::vector& start_indices, - const std::vector& stop_indices, - const std::vector& strides, T* output_data, - const Dims<4>& output_dims) { - TFLITE_DCHECK_EQ(start_indices.size(), 4); - auto op_params = strided_slice::BuildStridedSliceParams( - begin_mask, end_mask, shrink_axis_mask, start_indices, stop_indices, - strides); - StridedSliceReverseIndices(&op_params); - - StridedSlice(op_params, DimsToShape(input_dims), input_data, - DimsToShape(output_dims), output_data); -} - template inline void Slice(const tflite::SliceParams& op_params, const RuntimeShape& input_shape, const T* input_data, @@ -4119,22 +3573,6 @@ inline void Mean(const tflite::MeanParams& op_params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy Dims<4>. -template -inline void Mean(const T* input_data, const Dims<4>& input_dims, - const std::vector& reduction_indices, T* output_data, - const Dims<4>& output_dims) { - tflite::MeanParams op_params; - op_params.axis_count = reduction_indices.size(); - for (int i = 0; i < op_params.axis_count; ++i) { - op_params.axis[i] = reduction_indices[op_params.axis_count - 1 - i]; - } - - Mean(op_params, DimsToShape(input_dims), input_data, DimsToShape(output_dims), - output_data); -} - // Computes the mean of elements across dimensions given in axis. // It does so in two stages, first calculates the sum of elements along the axis // then divides it by the number of element in axis for quantized values. @@ -4392,20 +3830,6 @@ void Transpose(const TransposeParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -void Transpose(const T* input, const Dims<4>& input_dims, T* output, - const Dims<4>& output_dims, const int* permuted_axes) { - TransposeParams params; - params.perm_count = 4; - for (int i = 0; i < 4; ++i) { - params.perm[i] = 3 - permuted_axes[3 - i]; - } - Transpose(params, DimsToShape(input_dims), input, DimsToShape(output_dims), - output); -} - inline void TransposeConv( const ConvParams& params, const RuntimeShape& input_shape, const float* input_data, const RuntimeShape& filter_shape, @@ -4479,27 +3903,6 @@ inline void TransposeConv( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, - const float* filter_data, const Dims<4>& filter_dims, - int stride_width, int stride_height, int pad_width, - int pad_height, float* output_data, - const Dims<4>& output_dims, float* im2col_data, - const Dims<4>& im2col_dims) { - tflite::ConvParams op_params; - // Padding type is ignored, but still set. - op_params.padding_type = PaddingType::kSame; - op_params.padding_values.width = pad_width; - op_params.padding_values.height = pad_height; - op_params.stride_width = stride_width; - op_params.stride_height = stride_height; - - TransposeConv(op_params, DimsToShape(input_dims), input_data, - DimsToShape(filter_dims), filter_data, DimsToShape(output_dims), - output_data, DimsToShape(im2col_dims), im2col_data); -} - template inline bool EqualFn(T lhs, T rhs) { return lhs == rhs; @@ -4553,19 +3956,6 @@ inline void Comparison(const ComparisonParams& op_params, input2_data, output_shape, output_data); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template F> -inline void Comparison(const T* input1_data, const Dims<4>& input1_dims, - const T* input2_data, const Dims<4>& input2_dims, - bool* output_data, const Dims<4>& output_dims) { - ComparisonParams op_params; - // No parameters needed. - ComparisonImpl(op_params, DimsToShape(input1_dims), input1_data, - DimsToShape(input2_dims), input2_data, - DimsToShape(output_dims), output_data); -} - template F> inline void ComparisonWithScaling( const ComparisonParams& op_params, const RuntimeShape& input1_shape, @@ -4596,32 +3986,6 @@ inline void ComparisonWithScaling( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template F> -inline void Comparison(int left_shift, const T* input1_data, - const Dims<4>& input1_dims, int32 input1_offset, - int32 input1_multiplier, int input1_shift, - const T* input2_data, const Dims<4>& input2_dims, - int32 input2_offset, int32 input2_multiplier, - int input2_shift, bool* output_data, - const Dims<4>& output_dims) { - tflite::ComparisonParams op_params; - op_params.left_shift = left_shift; - op_params.input1_offset = input1_offset; - op_params.input1_multiplier = input1_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.input1_shift = kReverseShift * input1_shift; - op_params.input2_offset = input2_offset; - op_params.input2_multiplier = input2_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.input2_shift = kReverseShift * input2_shift; - - ComparisonWithScaling(op_params, DimsToShape(input1_dims), input1_data, - DimsToShape(input2_dims), input2_data, - DimsToShape(output_dims), output_data); -} - template F> inline void BroadcastComparison4DSlowImpl( const ComparisonParams& op_params, @@ -4665,22 +4029,6 @@ inline void BroadcastComparison4DSlow(const ComparisonParams& op_params, output_shape, output_data); } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template F> -inline void BroadcastComparison(const T* input1_data, - const Dims<4>& input1_dims, - const T* input2_data, - const Dims<4>& input2_dims, bool* output_data, - const Dims<4>& output_dims) { - ComparisonParams op_params; - // No parameters needed. - BroadcastComparison4DSlowImpl(op_params, DimsToShape(input1_dims), - input1_data, DimsToShape(input2_dims), - input2_data, DimsToShape(output_dims), - output_data); -} - template F> inline void BroadcastComparison4DSlowWithScaling( const ComparisonParams& op_params, @@ -4731,80 +4079,7 @@ inline void BroadcastComparison4DSlowWithScaling( } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template F> -inline void BroadcastComparison(int left_shift, const T* input1_data, - const Dims<4>& input1_dims, int32 input1_offset, - int32 input1_multiplier, int input1_shift, - const T* input2_data, - const Dims<4>& input2_dims, int32 input2_offset, - int32 input2_multiplier, int input2_shift, - bool* output_data, const Dims<4>& output_dims) { - ComparisonParams op_params; - - op_params.left_shift = left_shift; - op_params.input1_offset = input1_offset; - op_params.input1_multiplier = input1_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.input1_shift = kReverseShift * input1_shift; - op_params.input2_offset = input2_offset; - op_params.input2_multiplier = input2_multiplier; - // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. - op_params.input2_shift = kReverseShift * input2_shift; - - BroadcastComparison4DSlowWithScaling( - op_params, DimsToShape(input1_dims), input1_data, - DimsToShape(input2_dims), input2_data, DimsToShape(output_dims), - output_data); -} - #define TFLITE_COMPARISON_OP(name) \ - template \ - inline void name(const T* input1_data, const Dims<4>& input1_dims, \ - const T* input2_data, const Dims<4>& input2_dims, \ - bool* output_data, const Dims<4>& output_dims) { \ - gemmlowp::ScopedProfilingLabel label(#name); \ - Comparison(input1_data, input1_dims, input2_data, \ - input2_dims, output_data, output_dims); \ - } \ - template \ - inline void name( \ - int left_shift, const T* input1_data, const Dims<4>& input1_dims, \ - int32 input1_offset, int32 input1_multiplier, int input1_shift, \ - const T* input2_data, const Dims<4>& input2_dims, int32 input2_offset, \ - int32 input2_multiplier, int input2_shift, bool* output_data, \ - const Dims<4>& output_dims) { \ - gemmlowp::ScopedProfilingLabel label(#name "/8bit"); \ - Comparison(left_shift, input1_data, input1_dims, \ - input1_offset, input1_multiplier, input1_shift, \ - input2_data, input2_dims, input2_offset, \ - input2_multiplier, input2_shift, output_data, \ - output_dims); \ - } \ - template \ - inline void Broadcast##name( \ - const T* input1_data, const Dims<4>& input1_dims, const T* input2_data, \ - const Dims<4>& input2_dims, bool* output_data, \ - const Dims<4>& output_dims) { \ - gemmlowp::ScopedProfilingLabel label("Broadcast" #name); \ - BroadcastComparison(input1_data, input1_dims, input2_data, \ - input2_dims, output_data, output_dims); \ - } \ - template \ - inline void Broadcast##name( \ - int left_shift, const T* input1_data, const Dims<4>& input1_dims, \ - int32 input1_offset, int32 input1_multiplier, int input1_shift, \ - const T* input2_data, const Dims<4>& input2_dims, int32 input2_offset, \ - int32 input2_multiplier, int input2_shift, bool* output_data, \ - const Dims<4>& output_dims) { \ - gemmlowp::ScopedProfilingLabel label("Broadcast" #name "/8bit"); \ - BroadcastComparison(left_shift, input1_data, input1_dims, \ - input1_offset, input1_multiplier, \ - input1_shift, input2_data, input2_dims, \ - input2_offset, input2_multiplier, \ - input2_shift, output_data, output_dims); \ - } \ inline void name(const ComparisonParams& op_params, \ const RuntimeShape& input1_shape, const float* input1_data, \ const RuntimeShape& input2_shape, const float* input2_data, \ @@ -4889,19 +4164,6 @@ void Select(const RuntimeShape& input_condition_shape, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -inline void Select(const D* input_condition_data, - const Dims<4>& input_condition_dims, const T* input_x_data, - const Dims<4>& input_x_dims, const T* input_y_data, - const Dims<4>& input_y_dims, T* output_data, - const Dims<4>& output_dims) { - Select(DimsToShape(input_condition_dims), input_condition_data, - DimsToShape(input_x_dims), input_x_data, DimsToShape(input_y_dims), - input_y_data, DimsToShape(output_dims), output_data); -} - template void RankOneSelect(const RuntimeShape& input_condition_shape, const D* input_condition_data, @@ -4923,20 +4185,6 @@ void RankOneSelect(const RuntimeShape& input_condition_shape, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -inline void RankOneSelect(const D* input_condition_data, - const Dims<4>& input_condition_dims, - const T* input_x_data, const Dims<4>& input_x_dims, - const T* input_y_data, const Dims<4>& input_y_dims, - T* output_data, const Dims<4>& output_dims) { - RankOneSelect(DimsToShape(input_condition_dims), input_condition_data, - DimsToShape(input_x_dims), input_x_data, - DimsToShape(input_y_dims), input_y_data, - DimsToShape(output_dims), output_data); -} - // For easy implementation, the indices is always a vector of size-4 vectors. template inline void SparseToDense(const std::vector>& indices, @@ -4978,16 +4226,6 @@ inline void SparseToDense(const std::vector>& indices, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -template -inline void SparseToDense(const std::vector>& indices, - const T* values, T default_value, T* output_data, - const Dims<4>& output_dims, bool value_is_scalar) { - SparseToDense(indices, values, default_value, value_is_scalar, - DimsToShape(output_dims), output_data); -} - template inline void Pow(const RuntimeShape& input1_shape, const T* input1_data, const RuntimeShape& input2_shape, const T* input2_data, diff --git a/tensorflow/contrib/lite/kernels/internal/reference/softmax.h b/tensorflow/contrib/lite/kernels/internal/reference/softmax.h index 006174e8db..7d44296134 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/softmax.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/softmax.h @@ -57,16 +57,6 @@ inline void Softmax(const SoftmaxParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy. -inline void Softmax(const float* input_data, const RuntimeShape& input_shape, - float beta, float* output_data, - const RuntimeShape& output_shape) { - SoftmaxParams params; - params.beta = beta; - Softmax(params, input_shape, input_data, output_shape, output_data); -} - inline void Softmax(const SoftmaxParams& params, const RuntimeShape& input_shape, const uint8* input_data, const RuntimeShape& output_shape, uint8* output_data) { @@ -151,19 +141,6 @@ inline void Softmax(const SoftmaxParams& params, } } -// TODO(b/80418076): Move to legacy ops file, update invocations. -// Legacy -inline void Softmax(const uint8* input_data, const RuntimeShape& input_shape, - int32 input_beta_multiplier, int32 input_beta_left_shift, - int diff_min, uint8* output_data, - const RuntimeShape& output_shape) { - SoftmaxParams params; - params.input_multiplier = input_beta_multiplier; - params.input_left_shift = input_beta_left_shift; - params.diff_min = diff_min; - Softmax(params, input_shape, input_data, output_shape, output_data); -} - // Performs softmax along the input of size (input_size * batch_size). inline void Softmax(const float* in, const int input_size, const int batch_size, const float beta, float* out) { -- GitLab From 370d385c3029a7972ba201c8303942b30f09521c Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Thu, 27 Sep 2018 20:52:53 -0700 Subject: [PATCH 123/570] Creating a LinearModel that works with V2 feature columns. In subsequent change I'll change canned estimators to support FeatureColumn V2 and use this LinearModel. PiperOrigin-RevId: 214882241 --- .../feature_column/feature_column_v2.py | 574 ++--- .../feature_column/feature_column_v2_test.py | 2042 ++++------------- 2 files changed, 597 insertions(+), 2019 deletions(-) diff --git a/tensorflow/python/feature_column/feature_column_v2.py b/tensorflow/python/feature_column/feature_column_v2.py index 538641c251..a8d5bfb437 100644 --- a/tensorflow/python/feature_column/feature_column_v2.py +++ b/tensorflow/python/feature_column/feature_column_v2.py @@ -136,14 +136,11 @@ import six from tensorflow.python.eager import context -from tensorflow.python.feature_column import feature_column as fc_old from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape -from tensorflow.python.keras.engine import training from tensorflow.python.keras.engine.base_layer import Layer -from tensorflow.python.layers import base from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -153,7 +150,6 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import parsing_ops -from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import variable_scope @@ -245,28 +241,19 @@ class StateManager(object): raise NotImplementedError('StateManager.get_resource') -class _InputLayerStateManager(StateManager): - """Manages the state of InputLayer.""" +class _StateManagerImpl(StateManager): + """Manages the state of FeatureLayer and LinearModel.""" - def __init__(self, layer, feature_columns, trainable): - """Creates an _InputLayerStateManager object. + def __init__(self, layer, trainable): + """Creates an _StateManagerImpl object. Args: layer: The input layer this state manager is associated with. - feature_columns: List of feature columns for the input layer trainable: Whether by default, variables created are trainable or not. """ self._trainable = trainable self._layer = layer - self._cols_to_vars_map = {} - self._cols_to_names_map = {} - for column in sorted(feature_columns, key=lambda x: x.name): - self._cols_to_vars_map[column] = {} - base_name = column.name - if isinstance(column, SharedEmbeddingColumn): - base_name = column.shared_collection_name - with variable_scope.variable_scope(base_name) as vs: - self._cols_to_names_map[column] = _strip_leading_slashes(vs.name) + self._cols_to_vars_map = collections.defaultdict(lambda: {}) def create_variable(self, feature_column, @@ -277,19 +264,19 @@ class _InputLayerStateManager(StateManager): initializer=None): if name in self._cols_to_vars_map[feature_column]: raise ValueError('Variable already exists.') - with variable_scope.variable_scope(self._cols_to_names_map[feature_column]): - var = self._layer.add_variable( - name=name, - shape=shape, - dtype=dtype, - initializer=initializer, - trainable=self._trainable and trainable, - # TODO(rohanj): Get rid of this hack once we have a mechanism for - # specifying a default partitioner for an entire layer. In that case, - # the default getter for Layers should work. - getter=variable_scope.get_variable) - self._cols_to_vars_map[feature_column][name] = var - return var + + var = self._layer.add_variable( + name=name, + shape=shape, + dtype=dtype, + initializer=initializer, + trainable=self._trainable and trainable, + # TODO(rohanj): Get rid of this hack once we have a mechanism for + # specifying a default partitioner for an entire layer. In that case, + # the default getter for Layers should work. + getter=variable_scope.get_variable) + self._cols_to_vars_map[feature_column][name] = var + return var def get_variable(self, feature_column, name): if name in self._cols_to_vars_map[feature_column]: @@ -313,12 +300,15 @@ class FeatureLayer(Layer): keywords_embedded = embedding_column( categorical_column_with_hash_bucket("keywords", 10K), dimensions=16) columns = [price, keywords_embedded, ...] - features = tf.parse_example(..., features=make_parse_example_spec(columns)) feature_layer = FeatureLayer(columns) + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) dense_tensor = feature_layer(features) for units in [128, 64, 32]: dense_tensor = tf.layers.dense(dense_tensor, units, tf.nn.relu) - prediction = tf.layers.dense(dense_tensor, 1).""" + prediction = tf.layers.dense(dense_tensor, 1). + ``` + """ def __init__(self, feature_columns, @@ -375,8 +365,7 @@ class FeatureLayer(Layer): super(FeatureLayer, self).__init__(name=name, trainable=trainable, **kwargs) self._feature_columns = _normalize_feature_columns(feature_columns) - self._state_manager = _InputLayerStateManager(self, self._feature_columns, - self.trainable) + self._state_manager = _StateManagerImpl(self, self.trainable) self._shared_state_manager = shared_state_manager for column in sorted(self._feature_columns, key=lambda x: x.name): if not isinstance(column, DenseColumn): @@ -395,7 +384,8 @@ class FeatureLayer(Layer): column.create_state(self._shared_state_manager) else: with variable_scope.variable_scope(None, default_name=self.name): - column.create_state(self._state_manager) + with variable_scope.variable_scope(None, default_name=column.name): + column.create_state(self._state_manager) super(FeatureLayer, self).build(None) def call(self, features, cols_to_output_tensors=None): @@ -448,20 +438,18 @@ class FeatureLayer(Layer): return (input_shape[0], total_elements) -def linear_model(features, - feature_columns, - units=1, - sparse_combiner='sum', - weight_collections=None, - trainable=True, - cols_to_vars=None): - """Returns a linear prediction `Tensor` based on given `feature_columns`. +def _strip_leading_slashes(name): + return name.rsplit('/', 1)[-1] + + +class LinearModel(Layer): + """Produces a linear prediction `Tensor` based on given `feature_columns`. - This function generates a weighted sum based on output dimension `units`. + This layer generates a weighted sum based on output dimension `units`. Weighted sum refers to logits in classification problems. It refers to the prediction itself for linear regression problems. - Note on supported columns: `linear_model` treats categorical columns as + Note on supported columns: `LinearModel` treats categorical columns as `indicator_column`s. To be specific, assume the input as `SparseTensor` looks like: @@ -486,308 +474,189 @@ def linear_model(features, keywords = categorical_column_with_hash_bucket("keywords", 10K) keywords_price = crossed_column('keywords', price_buckets, ...) columns = [price_buckets, keywords, keywords_price ...] + linear_model = LinearModel(columns) + features = tf.parse_example(..., features=make_parse_example_spec(columns)) - prediction = linear_model(features, columns) + prediction = linear_model(features) ``` - - Args: - features: A mapping from key to tensors. `_FeatureColumn`s look up via these - keys. For example `numeric_column('price')` will look at 'price' key in - this dict. Values are `Tensor` or `SparseTensor` depending on - corresponding `_FeatureColumn`. - feature_columns: An iterable containing the FeatureColumns to use as inputs - to your model. All items should be instances of classes derived from - `_FeatureColumn`s. - units: An integer, dimensionality of the output space. Default value is 1. - sparse_combiner: A string specifying how to reduce if a categorical column - is multivalent. Except `numeric_column`, almost all columns passed to - `linear_model` are considered as categorical columns. It combines each - categorical column independently. Currently "mean", "sqrtn" and "sum" are - supported, with "sum" the default for linear model. "sqrtn" often achieves - good accuracy, in particular with bag-of-words columns. - * "sum": do not normalize features in the column - * "mean": do l1 normalization on features in the column - * "sqrtn": do l2 normalization on features in the column - For example, for two features represented as the categorical columns: - - ```python - # Feature 1 - - shape = [2, 2] - { - [0, 0]: "a" - [0, 1]: "b" - [1, 0]: "c" - } - - # Feature 2 - - shape = [2, 3] - { - [0, 0]: "d" - [1, 0]: "e" - [1, 1]: "f" - [1, 2]: "g" - } - ``` - with `sparse_combiner` as "mean", the linear model outputs conceptly are: - ``` - y_0 = 1.0 / 2.0 * ( w_a + w_ b) + w_c + b_0 - y_1 = w_d + 1.0 / 3.0 * ( w_e + w_ f + w_g) + b_1 - ``` - where `y_i` is the output, `b_i` is the bias, and `w_x` is the weight - assigned to the presence of `x` in the input features. - weight_collections: A list of collection names to which the Variable will be - added. Note that, variables will also be added to collections - `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. - trainable: If `True` also add the variable to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - cols_to_vars: If not `None`, must be a dictionary that will be filled with a - mapping from `_FeatureColumn` to associated list of `Variable`s. For - example, after the call, we might have cols_to_vars = { - _NumericColumn( - key='numeric_feature1', shape=(1,): - [], - 'bias': [], - _NumericColumn( - key='numeric_feature2', shape=(2,)): - []} - If a column creates no variables, its value will be an empty list. Note - that cols_to_vars will also contain a string key 'bias' that maps to a - list of Variables. - - Returns: - A `Tensor` which represents predictions/logits of a linear model. Its shape - is (batch_size, units) and its dtype is `float32`. - - Raises: - ValueError: if an item in `feature_columns` is neither a `_DenseColumn` - nor `_CategoricalColumn`. - """ - with variable_scope.variable_scope(None, 'linear_model') as vs: - model_name = _strip_leading_slashes(vs.name) - linear_model_layer = _LinearModel( - feature_columns=feature_columns, - units=units, - sparse_combiner=sparse_combiner, - weight_collections=weight_collections, - trainable=trainable, - name=model_name) - retval = linear_model_layer(features) # pylint: disable=not-callable - if cols_to_vars is not None: - cols_to_vars.update(linear_model_layer.cols_to_vars()) - return retval - - -def _add_to_collections(var, weight_collections): - """Adds a var to the list of weight_collections provided. - - Handles the case for partitioned and non-partitioned variables. - - Args: - var: A variable or Partitioned Variable. - weight_collections: List of collections to add variable to. - """ - for weight_collection in weight_collections: - # The layer self.add_variable call already adds it to GLOBAL_VARIABLES. - if weight_collection == ops.GraphKeys.GLOBAL_VARIABLES: - continue - # TODO(rohanj): Explore adding a _get_variable_list method on `Variable` - # so that we don't have to do this check. - if isinstance(var, variables.PartitionedVariable): - for constituent_var in list(var): - ops.add_to_collection(weight_collection, constituent_var) - else: - ops.add_to_collection(weight_collection, var) - - -class _FCLinearWrapper(base.Layer): - """Wraps a _FeatureColumn in a layer for use in a linear model. - - See `linear_model` above. """ def __init__(self, - feature_column, + feature_columns, units=1, sparse_combiner='sum', - weight_collections=None, trainable=True, name=None, + shared_state_manager=None, **kwargs): - super(_FCLinearWrapper, self).__init__( - trainable=trainable, name=name, **kwargs) - self._feature_column = feature_column - self._units = units - self._sparse_combiner = sparse_combiner - self._weight_collections = weight_collections + """Constructs a LinearModel. - def build(self, _): - if isinstance(self._feature_column, fc_old._CategoricalColumn): # pylint: disable=protected-access - weight = self.add_variable( - name='weights', - shape=(self._feature_column._num_buckets, self._units), # pylint: disable=protected-access - initializer=init_ops.zeros_initializer(), - trainable=self.trainable) - else: - num_elements = self._feature_column._variable_shape.num_elements() # pylint: disable=protected-access - weight = self.add_variable( - name='weights', - shape=[num_elements, self._units], - initializer=init_ops.zeros_initializer(), - trainable=self.trainable) - _add_to_collections(weight, self._weight_collections) - self._weight_var = weight - self.built = True - - def call(self, builder): - weighted_sum = fc_old._create_weighted_sum( # pylint: disable=protected-access - column=self._feature_column, - builder=builder, - units=self._units, - sparse_combiner=self._sparse_combiner, - weight_collections=self._weight_collections, - trainable=self.trainable, - weight_var=self._weight_var) - return weighted_sum + Args: + feature_columns: An iterable containing the FeatureColumns to use as + inputs to your model. All items should be instances of classes derived + from `_FeatureColumn`s. + units: An integer, dimensionality of the output space. Default value is 1. + sparse_combiner: A string specifying how to reduce if a categorical column + is multivalent. Except `numeric_column`, almost all columns passed to + `linear_model` are considered as categorical columns. It combines each + categorical column independently. Currently "mean", "sqrtn" and "sum" + are supported, with "sum" the default for linear model. "sqrtn" often + achieves good accuracy, in particular with bag-of-words columns. + * "sum": do not normalize features in the column + * "mean": do l1 normalization on features in the column + * "sqrtn": do l2 normalization on features in the column + For example, for two features represented as the categorical columns: + + ```python + # Feature 1 + + shape = [2, 2] + { + [0, 0]: "a" + [0, 1]: "b" + [1, 0]: "c" + } + + # Feature 2 + + shape = [2, 3] + { + [0, 0]: "d" + [1, 0]: "e" + [1, 1]: "f" + [1, 2]: "g" + } + ``` + + with `sparse_combiner` as "mean", the linear model outputs conceptly are + ``` + y_0 = 1.0 / 2.0 * ( w_a + w_ b) + w_c + b_0 + y_1 = w_d + 1.0 / 3.0 * ( w_e + w_ f + w_g) + b_1 + ``` + where `y_i` is the output, `b_i` is the bias, and `w_x` is the weight + assigned to the presence of `x` in the input features. + trainable: If `True` also add the variable to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + name: Name to give to the Linear Model. All variables and ops created will + be scoped by this name. + shared_state_manager: SharedEmbeddingStateManager that manages the state + of SharedEmbeddingColumns. For more info, look at `FeatureLayer`. + **kwargs: Keyword arguments to construct a layer. + Raises: + ValueError: if an item in `feature_columns` is neither a `DenseColumn` + nor `CategoricalColumn`. + """ + super(LinearModel, self).__init__(name=name, trainable=trainable, **kwargs) -class _BiasLayer(base.Layer): - """A layer for the bias term. - """ + self._feature_columns = _normalize_feature_columns(feature_columns) + self._feature_columns = sorted(self._feature_columns, key=lambda x: x.name) + for column in self._feature_columns: + if not isinstance(column, (DenseColumn, CategoricalColumn)): + raise ValueError( + 'Items of feature_columns must be either a ' + 'DenseColumn or CategoricalColumn. Given: {}'.format(column)) - def __init__(self, - units=1, - trainable=True, - weight_collections=None, - name=None, - **kwargs): - super(_BiasLayer, self).__init__(trainable=trainable, name=name, **kwargs) self._units = units - self._weight_collections = weight_collections - - def build(self, _): - self._bias_variable = self.add_variable( - 'bias_weights', - shape=[self._units], - initializer=init_ops.zeros_initializer(), - trainable=self.trainable) - _add_to_collections(self._bias_variable, self._weight_collections) - self.built = True - - def call(self, _): - return self._bias_variable + self._sparse_combiner = sparse_combiner + self._state_manager = _StateManagerImpl(self, self.trainable) + self._shared_state_manager = shared_state_manager + self._bias_variable = None -def _get_expanded_variable_list(var_list): - returned_list = [] - for variable in var_list: - if (isinstance(variable, variables.Variable) or - resource_variable_ops.is_resource_variable(variable)): - returned_list.append(variable) # Single variable case. - else: # Must be a PartitionedVariable, so convert into a list. - returned_list.extend(list(variable)) - return returned_list + def build(self, _): + # Create state for shared embedding columns. + for column in self._feature_columns: + if isinstance(column, SharedEmbeddingColumn): + column.create_state(self._shared_state_manager) + # We need variable scopes for now because we want the variable partitioning + # information to percolate down. We also use _pure_variable_scope's here + # since we want to open up a name_scope in the `call` method while creating + # the ops. + with variable_scope._pure_variable_scope(self.name): # pylint: disable=protected-access + for column in self._feature_columns: + with variable_scope._pure_variable_scope(column.name): # pylint: disable=protected-access + # Create the state for each feature column + if not isinstance(column, SharedEmbeddingColumn): + column.create_state(self._state_manager) + + # Create a weight variable for each column. + if isinstance(column, CategoricalColumn): + first_dim = column.num_buckets + else: + first_dim = column.variable_shape.num_elements() + self._state_manager.create_variable( + column, + name='weights', + dtype=dtypes.float32, + shape=(first_dim, self._units), + initializer=init_ops.zeros_initializer(), + trainable=self.trainable) + + # Create a bias variable. + self._bias_variable = self.add_variable( + name='bias_weights', + dtype=dtypes.float32, + shape=[self._units], + initializer=init_ops.zeros_initializer(), + trainable=self.trainable, + # TODO(rohanj): Get rid of this hack once we have a mechanism for + # specifying a default partitioner for an entire layer. In that case, + # the default getter for Layers should work. + getter=variable_scope.get_variable) -def _strip_leading_slashes(name): - return name.rsplit('/', 1)[-1] + super(LinearModel, self).build(None) + def call(self, features): + """Returns a `Tensor` the represents the predictions of a linear model. -class _LinearModel(training.Model): - """Creates a linear model using feature columns. + Args: + features: A mapping from key to tensors. `_FeatureColumn`s look up via + these keys. For example `numeric_column('price')` will look at 'price' + key in this dict. Values are `Tensor` or `SparseTensor` depending on + corresponding `_FeatureColumn`. - See `linear_model` for details. - """ + Returns: + A `Tensor` which represents predictions/logits of a linear model. Its + shape is (batch_size, units) and its dtype is `float32`. - def __init__(self, - feature_columns, - units=1, - sparse_combiner='sum', - weight_collections=None, - trainable=True, - name=None, - **kwargs): - super(_LinearModel, self).__init__(name=name, **kwargs) - self._feature_columns = fc_old._normalize_feature_columns( # pylint: disable=protected-access - feature_columns) - self._weight_collections = list(weight_collections or []) - if ops.GraphKeys.GLOBAL_VARIABLES not in self._weight_collections: - self._weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES) - if ops.GraphKeys.MODEL_VARIABLES not in self._weight_collections: - self._weight_collections.append(ops.GraphKeys.MODEL_VARIABLES) - - column_layers = {} - for column in sorted(self._feature_columns, key=lambda x: x.name): - with variable_scope.variable_scope( - None, default_name=column._var_scope_name) as vs: # pylint: disable=protected-access - # Having the fully expressed variable scope name ends up doubly - # expressing the outer scope (scope with which this method was called) - # in the name of the variable that would get created. - column_name = _strip_leading_slashes(vs.name) - column_layer = _FCLinearWrapper(column, units, sparse_combiner, - self._weight_collections, trainable, - column_name, **kwargs) - column_layers[column_name] = column_layer - self._column_layers = self._add_layers(column_layers) - self._bias_layer = _BiasLayer( - units=units, - trainable=trainable, - weight_collections=self._weight_collections, - name='bias_layer', - **kwargs) - self._cols_to_vars = {} - - def cols_to_vars(self): - """Returns a dict mapping _FeatureColumns to variables. - - See `linear_model` for more information. - This is not populated till `call` is called i.e. layer is built. + Raises: + ValueError: If features are not a dictionary. """ - return self._cols_to_vars - - def call(self, features): - with variable_scope.variable_scope(self.name): - for column in self._feature_columns: - if not isinstance( - column, - ( - fc_old._DenseColumn, # pylint: disable=protected-access - fc_old._CategoricalColumn)): # pylint: disable=protected-access - raise ValueError( - 'Items of feature_columns must be either a ' - '_DenseColumn or _CategoricalColumn. Given: {}'.format(column)) - weighted_sums = [] - ordered_columns = [] - builder = fc_old._LazyBuilder(features) # pylint: disable=protected-access - for layer in sorted(self._column_layers.values(), key=lambda x: x.name): - column = layer._feature_column # pylint: disable=protected-access - ordered_columns.append(column) - weighted_sum = layer(builder) + if not isinstance(features, dict): + raise ValueError('We expected a dictionary here. Instead we got: ', + features) + transformation_cache = FeatureTransformationCache(features) + weighted_sums = [] + for column in self._feature_columns: + with ops.name_scope(column.name): + # All the weights used in the linear model are owned by the state + # manager associated with this Linear Model. + weight_var = self._state_manager.get_variable(column, 'weights') + + # The embedding weights for the SharedEmbeddingColumn are owned by + # the shared_state_manager and so we need to pass that in while + # creating the weighted sum. For all other columns, the state is owned + # by the Linear Model's state manager. + if isinstance(column, SharedEmbeddingColumn): + state_manager = self._shared_state_manager + else: + state_manager = self._state_manager + weighted_sum = _create_weighted_sum( + column=column, + transformation_cache=transformation_cache, + state_manager=state_manager, + sparse_combiner=self._sparse_combiner, + weight_var=weight_var) weighted_sums.append(weighted_sum) - self._cols_to_vars[column] = ops.get_collection( - ops.GraphKeys.GLOBAL_VARIABLES, scope=layer.scope_name) - - _verify_static_batch_size_equality(weighted_sums, ordered_columns) - predictions_no_bias = math_ops.add_n( - weighted_sums, name='weighted_sum_no_bias') - predictions = nn_ops.bias_add( - predictions_no_bias, - self._bias_layer( # pylint: disable=not-callable - builder, - scope=variable_scope.get_variable_scope()), # pylint: disable=not-callable - name='weighted_sum') - bias = self._bias_layer.variables[0] - self._cols_to_vars['bias'] = _get_expanded_variable_list([bias]) - return predictions - def _add_layers(self, layers): - # "Magic" required for keras.Model classes to track all the variables in - # a list of layers.Layer objects. - # TODO(ashankar): Figure out API so user code doesn't have to do this. - for name, layer in layers.items(): - setattr(self, 'layer-%s' % name, layer) - return layers + _verify_static_batch_size_equality(weighted_sums, self._feature_columns) + predictions_no_bias = math_ops.add_n( + weighted_sums, name='weighted_sum_no_bias') + predictions = nn_ops.bias_add( + predictions_no_bias, self._bias_variable, name='weighted_sum') + return predictions def _transform_features(features, feature_columns, state_manager): @@ -2053,58 +1922,32 @@ def is_feature_column_v2(feature_columns): return True -def _create_weighted_sum(column, - transformation_cache, - state_manager, - units, - sparse_combiner, - weight_collections, - trainable, - weight_var=None): +def _create_weighted_sum(column, transformation_cache, state_manager, + sparse_combiner, weight_var): """Creates a weighted sum for a dense/categorical column for linear_model.""" if isinstance(column, CategoricalColumn): return _create_categorical_column_weighted_sum( column=column, transformation_cache=transformation_cache, state_manager=state_manager, - units=units, sparse_combiner=sparse_combiner, - weight_collections=weight_collections, - trainable=trainable, weight_var=weight_var) else: return _create_dense_column_weighted_sum( column=column, transformation_cache=transformation_cache, state_manager=state_manager, - units=units, - weight_collections=weight_collections, - trainable=trainable, weight_var=weight_var) -def _create_dense_column_weighted_sum(column, - transformation_cache, - state_manager, - units, - weight_collections, - trainable, - weight_var=None): +def _create_dense_column_weighted_sum(column, transformation_cache, + state_manager, weight_var): """Create a weighted sum of a dense column for linear_model.""" tensor = column.get_dense_tensor(transformation_cache, state_manager) num_elements = column.variable_shape.num_elements() batch_size = array_ops.shape(tensor)[0] tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements)) - if weight_var is not None: - weight = weight_var - else: - weight = variable_scope.get_variable( - name='weights', - shape=[num_elements, units], - initializer=init_ops.zeros_initializer(), - trainable=trainable, - collections=weight_collections) - return math_ops.matmul(tensor, weight, name='weighted_sum') + return math_ops.matmul(tensor, weight_var, name='weighted_sum') class CategoricalColumn(FeatureColumn): @@ -2145,14 +1988,8 @@ class CategoricalColumn(FeatureColumn): pass -def _create_categorical_column_weighted_sum(column, - transformation_cache, - state_manager, - units, - sparse_combiner, - weight_collections, - trainable, - weight_var=None): +def _create_categorical_column_weighted_sum( + column, transformation_cache, state_manager, sparse_combiner, weight_var): # pylint: disable=g-doc-return-or-yield,g-doc-args """Create a weighted sum of a categorical column for linear_model. @@ -2191,17 +2028,8 @@ def _create_categorical_column_weighted_sum(column, weight_tensor = sparse_ops.sparse_reshape( weight_tensor, [array_ops.shape(weight_tensor)[0], -1]) - if weight_var is not None: - weight = weight_var - else: - weight = variable_scope.get_variable( - name='weights', - shape=(column.num_buckets, units), - initializer=init_ops.zeros_initializer(), - trainable=trainable, - collections=weight_collections) return _safe_embedding_lookup_sparse( - weight, + weight_var, id_tensor, sparse_weights=weight_tensor, combiner=sparse_combiner, @@ -2836,6 +2664,10 @@ class SharedEmbeddingColumn( def create_state(self, state_manager): """Creates the shared embedding lookup variable.""" + if not isinstance(state_manager, SharedEmbeddingStateManager): + raise ValueError('Expected state_manager to be of type ' + 'SharedEmbeddingStateManager. Obtained type: {}'.format( + type(state_manager))) embedding_shape = (self.categorical_column.num_buckets, self.dimension) state_manager.create_variable( name=self.shared_collection_name, diff --git a/tensorflow/python/feature_column/feature_column_v2_test.py b/tensorflow/python/feature_column/feature_column_v2_test.py index 2970431167..a13a5010e1 100644 --- a/tensorflow/python/feature_column/feature_column_v2_test.py +++ b/tensorflow/python/feature_column/feature_column_v2_test.py @@ -31,9 +31,7 @@ from tensorflow.python.client import session from tensorflow.python.eager import backprop from tensorflow.python.eager import context from tensorflow.python.estimator.inputs import numpy_io -from tensorflow.python.feature_column import feature_column as fc_old from tensorflow.python.feature_column import feature_column_v2 as fc -from tensorflow.python.feature_column.feature_column_v2 import _LinearModel from tensorflow.python.feature_column.feature_column_v2 import _transform_features from tensorflow.python.feature_column.feature_column_v2 import FeatureColumn from tensorflow.python.feature_column.feature_column_v2 import FeatureLayer @@ -48,7 +46,6 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import parsing_ops -from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test @@ -360,26 +357,12 @@ class NumericColumnTest(test.TestCase): self.assertEqual(a.default_value, ((3., 2.),)) def test_linear_model(self): - price = fc_old.numeric_column('price') - with ops.Graph().as_default(): - features = {'price': [[1.], [5.]]} - predictions = fc.linear_model(features, [price]) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) - with _initialized_session() as sess: - self.assertAllClose([0.], bias.eval()) - self.assertAllClose([[0.]], price_var.eval()) - self.assertAllClose([[0.], [0.]], predictions.eval()) - sess.run(price_var.assign([[10.]])) - self.assertAllClose([[10.], [50.]], predictions.eval()) - - def test_keras_linear_model(self): - price = fc_old.numeric_column('price') + price = fc.numeric_column('price') with ops.Graph().as_default(): features = {'price': [[1.], [5.]]} - predictions = get_keras_linear_model_predictions(features, [price]) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) + model = fc.LinearModel([price]) + predictions = model(features) + price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], bias.eval()) self.assertAllClose([[0.]], price_var.eval()) @@ -564,13 +547,13 @@ class BucketizedColumnTest(test.TestCase): def test_linear_model_one_input_value(self): """Tests linear_model() for input with shape=[1].""" - price = fc_old.numeric_column('price', shape=[1]) - bucketized_price = fc_old.bucketized_column(price, boundaries=[0, 2, 4, 6]) + price = fc.numeric_column('price', shape=[1]) + bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): features = {'price': [[-1.], [1.], [5.], [6.]]} - predictions = fc.linear_model(features, [bucketized_price]) - bias = get_linear_model_bias() - bucketized_price_var = get_linear_model_column_var(bucketized_price) + model = fc.LinearModel([bucketized_price]) + predictions = model(features) + bucketized_price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], bias.eval()) # One weight variable per bucket, all initialized to zero. @@ -589,13 +572,13 @@ class BucketizedColumnTest(test.TestCase): def test_linear_model_two_input_values(self): """Tests linear_model() for input with shape=[2].""" - price = fc_old.numeric_column('price', shape=[2]) - bucketized_price = fc_old.bucketized_column(price, boundaries=[0, 2, 4, 6]) + price = fc.numeric_column('price', shape=[2]) + bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): features = {'price': [[-1., 1.], [5., 6.]]} - predictions = fc.linear_model(features, [bucketized_price]) - bias = get_linear_model_bias() - bucketized_price_var = get_linear_model_column_var(bucketized_price) + model = fc.LinearModel([bucketized_price]) + predictions = model(features) + bucketized_price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], bias.eval()) # One weight per bucket per input column, all initialized to zero. @@ -616,62 +599,6 @@ class BucketizedColumnTest(test.TestCase): sess.run(bias.assign([1.])) self.assertAllClose([[81.], [141.]], predictions.eval()) - def test_keras_linear_model_one_input_value(self): - """Tests _LinearModel for input with shape=[1].""" - price = fc_old.numeric_column('price', shape=[1]) - bucketized_price = fc_old.bucketized_column(price, boundaries=[0, 2, 4, 6]) - with ops.Graph().as_default(): - features = {'price': [[-1.], [1.], [5.], [6.]]} - predictions = get_keras_linear_model_predictions(features, - [bucketized_price]) - bias = get_linear_model_bias() - bucketized_price_var = get_linear_model_column_var(bucketized_price) - with _initialized_session() as sess: - self.assertAllClose([0.], bias.eval()) - # One weight variable per bucket, all initialized to zero. - self.assertAllClose([[0.], [0.], [0.], [0.], [0.]], - bucketized_price_var.eval()) - self.assertAllClose([[0.], [0.], [0.], [0.]], predictions.eval()) - sess.run( - bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.]])) - # price -1. is in the 0th bucket, whose weight is 10. - # price 1. is in the 1st bucket, whose weight is 20. - # price 5. is in the 3rd bucket, whose weight is 40. - # price 6. is in the 4th bucket, whose weight is 50. - self.assertAllClose([[10.], [20.], [40.], [50.]], predictions.eval()) - sess.run(bias.assign([1.])) - self.assertAllClose([[11.], [21.], [41.], [51.]], predictions.eval()) - - def test_keras_linear_model_two_input_values(self): - """Tests _LinearModel for input with shape=[2].""" - price = fc_old.numeric_column('price', shape=[2]) - bucketized_price = fc_old.bucketized_column(price, boundaries=[0, 2, 4, 6]) - with ops.Graph().as_default(): - features = {'price': [[-1., 1.], [5., 6.]]} - predictions = get_keras_linear_model_predictions(features, - [bucketized_price]) - bias = get_linear_model_bias() - bucketized_price_var = get_linear_model_column_var(bucketized_price) - with _initialized_session() as sess: - self.assertAllClose([0.], bias.eval()) - # One weight per bucket per input column, all initialized to zero. - self.assertAllClose( - [[0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.], [0.]], - bucketized_price_var.eval()) - self.assertAllClose([[0.], [0.]], predictions.eval()) - sess.run( - bucketized_price_var.assign([[10.], [20.], [30.], [40.], [50.], - [60.], [70.], [80.], [90.], [100.]])) - # 1st example: - # price -1. is in the 0th bucket, whose weight is 10. - # price 1. is in the 6th bucket, whose weight is 70. - # 2nd example: - # price 5. is in the 3rd bucket, whose weight is 40. - # price 6. is in the 9th bucket, whose weight is 100. - self.assertAllClose([[80.], [140.]], predictions.eval()) - sess.run(bias.assign([1.])) - self.assertAllClose([[81.], [141.]], predictions.eval()) - class HashedCategoricalColumnTest(test.TestCase): @@ -852,39 +779,18 @@ class HashedCategoricalColumnTest(test.TestCase): transformation_cache.get(hashed_sparse, None), id_weight_pair.id_tensor) def test_linear_model(self): - wire_column = fc_old.categorical_column_with_hash_bucket('wire', 4) - self.assertEqual(4, wire_column._num_buckets) - with ops.Graph().as_default(): - predictions = fc.linear_model({ - wire_column.name: sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - }, (wire_column,)) - bias = get_linear_model_bias() - wire_var = get_linear_model_column_var(wire_column) - with _initialized_session(): - self.assertAllClose((0.,), bias.eval()) - self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) - self.assertAllClose(((0.,), (0.,)), predictions.eval()) - wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() - # 'marlo' -> 3: wire_var[3] = 4 - # 'skywalker' -> 2, 'omar' -> 2: wire_var[2] + wire_var[2] = 3+3 = 6 - self.assertAllClose(((4.,), (6.,)), predictions.eval()) - - def test_keras_linear_model(self): - wire_column = fc_old.categorical_column_with_hash_bucket('wire', 4) - self.assertEqual(4, wire_column._num_buckets) + wire_column = fc.categorical_column_with_hash_bucket('wire', 4) + self.assertEqual(4, wire_column.num_buckets) with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions({ + model = fc.LinearModel((wire_column,)) + predictions = model({ wire_column.name: sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) - }, (wire_column,)) - bias = get_linear_model_bias() - wire_var = get_linear_model_column_var(wire_column) + }) + wire_var, bias = model.variables with _initialized_session(): self.assertAllClose((0.,), bias.eval()) self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) @@ -1103,93 +1009,12 @@ class CrossedColumnTest(test.TestCase): Uses data from test_get_sparse_tesnsors_simple. """ - a = fc_old.numeric_column('a', dtype=dtypes.int32, shape=(2,)) - b = fc_old.bucketized_column(a, boundaries=(0, 1)) - crossed = fc_old.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5) - with ops.Graph().as_default(): - predictions = fc.linear_model({ - 'a': constant_op.constant(((-1., .5), (.5, 1.))), - 'c': sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=['cA', 'cB', 'cC'], - dense_shape=(2, 2)), - }, (crossed,)) - bias = get_linear_model_bias() - crossed_var = get_linear_model_column_var(crossed) - with _initialized_session() as sess: - self.assertAllClose((0.,), bias.eval()) - self.assertAllClose( - ((0.,), (0.,), (0.,), (0.,), (0.,)), crossed_var.eval()) - self.assertAllClose(((0.,), (0.,)), predictions.eval()) - sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) - # Expected ids after cross = (1, 0, 1, 3, 4, 2) - self.assertAllClose(((3.,), (14.,)), predictions.eval()) - sess.run(bias.assign((.1,))) - self.assertAllClose(((3.1,), (14.1,)), predictions.eval()) - - def test_linear_model_with_weights(self): - - class _TestColumnWithWeights(fc_old._CategoricalColumn): - """Produces sparse IDs and sparse weights.""" - - @property - def name(self): - return 'test_column' - - @property - def _parse_example_spec(self): - return { - self.name: parsing_ops.VarLenFeature(dtypes.int32), - '{}_weights'.format(self.name): parsing_ops.VarLenFeature( - dtypes.float32), - } - - @property - def _num_buckets(self): - return 5 - - def _transform_feature(self, inputs): - return (inputs.get(self.name), - inputs.get('{}_weights'.format(self.name))) - - def _get_sparse_tensors(self, inputs, weight_collections=None, - trainable=None): - """Populates both id_tensor and weight_tensor.""" - ids_and_weights = inputs.get(self) - return fc_old._CategoricalColumn.IdWeightPair( - id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1]) - - t = _TestColumnWithWeights() - crossed = fc_old.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5) - with ops.Graph().as_default(): - with self.assertRaisesRegexp( - ValueError, - 'crossed_column does not support weight_tensor.*{}'.format(t.name)): - fc.linear_model({ - t.name: sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=[0, 1, 2], - dense_shape=(2, 2)), - '{}_weights'.format(t.name): sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=[1., 10., 2.], - dense_shape=(2, 2)), - 'c': sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=['cA', 'cB', 'cC'], - dense_shape=(2, 2)), - }, (crossed,)) - - def test_keras_linear_model(self): - """Tests _LinearModel. - - Uses data from test_get_sparse_tesnsors_simple. - """ - a = fc_old.numeric_column('a', dtype=dtypes.int32, shape=(2,)) - b = fc_old.bucketized_column(a, boundaries=(0, 1)) - crossed = fc_old.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5) + a = fc.numeric_column('a', dtype=dtypes.int32, shape=(2,)) + b = fc.bucketized_column(a, boundaries=(0, 1)) + crossed = fc.crossed_column([b, 'c'], hash_bucket_size=5, hash_key=5) with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions({ + model = fc.LinearModel((crossed,)) + predictions = model({ 'a': constant_op.constant(((-1., .5), (.5, 1.))), 'c': @@ -1197,847 +1022,126 @@ class CrossedColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=['cA', 'cB', 'cC'], dense_shape=(2, 2)), - }, (crossed,)) - bias = get_linear_model_bias() - crossed_var = get_linear_model_column_var(crossed) + }) + crossed_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose((0.,), bias.eval()) - self.assertAllClose(((0.,), (0.,), (0.,), (0.,), (0.,)), - crossed_var.eval()) - self.assertAllClose(((0.,), (0.,)), predictions.eval()) - sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) - # Expected ids after cross = (1, 0, 1, 3, 4, 2) - self.assertAllClose(((3.,), (14.,)), predictions.eval()) - sess.run(bias.assign((.1,))) - self.assertAllClose(((3.1,), (14.1,)), predictions.eval()) - - def test_keras_linear_model_with_weights(self): - - class _TestColumnWithWeights(fc_old._CategoricalColumn): - """Produces sparse IDs and sparse weights.""" - - @property - def name(self): - return 'test_column' - - @property - def _parse_example_spec(self): - return { - self.name: - parsing_ops.VarLenFeature(dtypes.int32), - '{}_weights'.format(self.name): - parsing_ops.VarLenFeature(dtypes.float32), - } - - @property - def _num_buckets(self): - return 5 - - def _transform_feature(self, inputs): - return (inputs.get(self.name), - inputs.get('{}_weights'.format(self.name))) - - def _get_sparse_tensors(self, - inputs, - weight_collections=None, - trainable=None): - """Populates both id_tensor and weight_tensor.""" - ids_and_weights = inputs.get(self) - return fc_old._CategoricalColumn.IdWeightPair( - id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1]) - - t = _TestColumnWithWeights() - crossed = fc_old.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5) - with ops.Graph().as_default(): - with self.assertRaisesRegexp( - ValueError, - 'crossed_column does not support weight_tensor.*{}'.format(t.name)): - get_keras_linear_model_predictions({ - t.name: - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=[0, 1, 2], - dense_shape=(2, 2)), - '{}_weights'.format(t.name): - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=[1., 10., 2.], - dense_shape=(2, 2)), - 'c': - sparse_tensor.SparseTensor( - indices=((0, 0), (1, 0), (1, 1)), - values=['cA', 'cB', 'cC'], - dense_shape=(2, 2)), - }, (crossed,)) - - -def get_linear_model_bias(name='linear_model'): - with variable_scope.variable_scope(name, reuse=True): - return variable_scope.get_variable('bias_weights') - - -def get_linear_model_column_var(column, name='linear_model'): - return ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES, - name + '/' + column.name)[0] - - -def get_keras_linear_model_predictions(features, - feature_columns, - units=1, - sparse_combiner='sum', - weight_collections=None, - trainable=True, - cols_to_vars=None): - keras_linear_model = _LinearModel( - feature_columns, - units, - sparse_combiner, - weight_collections, - trainable, - name='linear_model') - retval = keras_linear_model(features) # pylint: disable=not-callable - if cols_to_vars is not None: - cols_to_vars.update(keras_linear_model.cols_to_vars()) - return retval - - -class LinearModelTest(test.TestCase): - - def test_raises_if_empty_feature_columns(self): - with self.assertRaisesRegexp(ValueError, - 'feature_columns must not be empty'): - fc.linear_model(features={}, feature_columns=[]) - - def test_should_be_feature_column(self): - with self.assertRaisesRegexp(ValueError, 'must be a _FeatureColumn'): - fc.linear_model(features={'a': [[0]]}, feature_columns='NotSupported') - - def test_should_be_dense_or_categorical_column(self): - - class NotSupportedColumn(fc_old._FeatureColumn): - - @property - def name(self): - return 'NotSupportedColumn' - - def _transform_feature(self, cache): - pass - - @property - def _parse_example_spec(self): - pass - - with self.assertRaisesRegexp( - ValueError, 'must be either a _DenseColumn or _CategoricalColumn'): - fc.linear_model( - features={'a': [[0]]}, feature_columns=[NotSupportedColumn()]) - - def test_does_not_support_dict_columns(self): - with self.assertRaisesRegexp( - ValueError, 'Expected feature_columns to be iterable, found dict.'): - fc.linear_model( - features={'a': [[0]]}, - feature_columns={'a': fc_old.numeric_column('a')}) - - def test_raises_if_duplicate_name(self): - with self.assertRaisesRegexp( - ValueError, 'Duplicate feature column name found for columns'): - fc.linear_model( - features={'a': [[0]]}, - feature_columns=[ - fc_old.numeric_column('a'), - fc_old.numeric_column('a') - ]) - - def test_dense_bias(self): - price = fc_old.numeric_column('price') - with ops.Graph().as_default(): - features = {'price': [[1.], [5.]]} - predictions = fc.linear_model(features, [price]) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) - with _initialized_session() as sess: - self.assertAllClose([0.], bias.eval()) - sess.run(price_var.assign([[10.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[15.], [55.]], predictions.eval()) - - def test_sparse_bias(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {'wire_cast': wire_tensor} - predictions = fc.linear_model(features, [wire_cast]) - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) - with _initialized_session() as sess: - self.assertAllClose([0.], bias.eval()) - self.assertAllClose([[0.], [0.], [0.], [0.]], wire_cast_var.eval()) - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[1005.], [10015.]], predictions.eval()) - - def test_dense_and_sparse_bias(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - price = fc_old.numeric_column('price') - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]} - predictions = fc.linear_model(features, [wire_cast, price]) - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) - price_var = get_linear_model_column_var(price) - with _initialized_session() as sess: - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(bias.assign([5.])) - sess.run(price_var.assign([[10.]])) - self.assertAllClose([[1015.], [10065.]], predictions.eval()) - - def test_dense_and_sparse_column(self): - """When the column is both dense and sparse, uses sparse tensors.""" - - class _DenseAndSparseColumn(fc_old._DenseColumn, fc_old._CategoricalColumn): - - @property - def name(self): - return 'dense_and_sparse_column' - - @property - def _parse_example_spec(self): - return {self.name: parsing_ops.VarLenFeature(self.dtype)} - - def _transform_feature(self, inputs): - return inputs.get(self.name) - - @property - def _variable_shape(self): - raise ValueError('Should not use this method.') - - def _get_dense_tensor(self, inputs, weight_collections=None, - trainable=None): - raise ValueError('Should not use this method.') - - @property - def _num_buckets(self): - return 4 - - def _get_sparse_tensors(self, inputs, weight_collections=None, - trainable=None): - sp_tensor = sparse_tensor.SparseTensor( - indices=[[0, 0], [1, 0], [1, 1]], - values=[2, 0, 3], - dense_shape=[2, 2]) - return fc_old._CategoricalColumn.IdWeightPair(sp_tensor, None) - - dense_and_sparse_column = _DenseAndSparseColumn() - with ops.Graph().as_default(): - sp_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {dense_and_sparse_column.name: sp_tensor} - predictions = fc.linear_model(features, [dense_and_sparse_column]) - bias = get_linear_model_bias() - dense_and_sparse_column_var = get_linear_model_column_var( - dense_and_sparse_column) - with _initialized_session() as sess: - sess.run(dense_and_sparse_column_var.assign( - [[10.], [100.], [1000.], [10000.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[1005.], [10015.]], predictions.eval()) - - def test_dense_multi_output(self): - price = fc_old.numeric_column('price') - with ops.Graph().as_default(): - features = {'price': [[1.], [5.]]} - predictions = fc.linear_model(features, [price], units=3) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) - with _initialized_session() as sess: - self.assertAllClose(np.zeros((3,)), bias.eval()) - self.assertAllClose(np.zeros((1, 3)), price_var.eval()) - sess.run(price_var.assign([[10., 100., 1000.]])) - sess.run(bias.assign([5., 6., 7.])) - self.assertAllClose([[15., 106., 1007.], [55., 506., 5007.]], - predictions.eval()) - - def test_sparse_multi_output(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {'wire_cast': wire_tensor} - predictions = fc.linear_model(features, [wire_cast], units=3) - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) - with _initialized_session() as sess: - self.assertAllClose(np.zeros((3,)), bias.eval()) - self.assertAllClose(np.zeros((4, 3)), wire_cast_var.eval()) - sess.run( - wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], [ - 1000., 1100., 1200. - ], [10000., 11000., 12000.]])) - sess.run(bias.assign([5., 6., 7.])) - self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]], - predictions.eval()) - - def test_dense_multi_dimension(self): - price = fc_old.numeric_column('price', shape=2) - with ops.Graph().as_default(): - features = {'price': [[1., 2.], [5., 6.]]} - predictions = fc.linear_model(features, [price]) - price_var = get_linear_model_column_var(price) - with _initialized_session() as sess: - self.assertAllClose([[0.], [0.]], price_var.eval()) - sess.run(price_var.assign([[10.], [100.]])) - self.assertAllClose([[210.], [650.]], predictions.eval()) - - def test_sparse_multi_rank(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - wire_tensor = array_ops.sparse_placeholder(dtypes.string) - wire_value = sparse_tensor.SparseTensorValue( - values=['omar', 'stringer', 'marlo', 'omar'], # hashed = [2, 0, 3, 2] - indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]], - dense_shape=[2, 2, 2]) - features = {'wire_cast': wire_tensor} - predictions = fc.linear_model(features, [wire_cast]) - wire_cast_var = get_linear_model_column_var(wire_cast) - with _initialized_session() as sess: - self.assertAllClose(np.zeros((4, 1)), wire_cast_var.eval()) - self.assertAllClose( - np.zeros((2, 1)), - predictions.eval(feed_dict={wire_tensor: wire_value})) - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) self.assertAllClose( - [[1010.], [11000.]], - predictions.eval(feed_dict={wire_tensor: wire_value})) - - def test_sparse_combiner(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = {'wire_cast': wire_tensor} - predictions = fc.linear_model( - features, [wire_cast], sparse_combiner='mean') - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) - with _initialized_session() as sess: - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[1005.], [5010.]], predictions.eval()) - - def test_sparse_combiner_with_negative_weights(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - wire_cast_weights = fc_old.weighted_categorical_column(wire_cast, 'weights') - - with ops.Graph().as_default(): - wire_tensor = sparse_tensor.SparseTensor( - values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] - indices=[[0, 0], [1, 0], [1, 1]], - dense_shape=[2, 2]) - features = { - 'wire_cast': wire_tensor, - 'weights': constant_op.constant([[1., 1., -1.0]]) - } - predictions = fc.linear_model( - features, [wire_cast_weights], sparse_combiner='sum') - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) - with _initialized_session() as sess: - sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(bias.assign([5.])) - self.assertAllClose([[1005.], [-9985.]], predictions.eval()) - - def test_dense_multi_dimension_multi_output(self): - price = fc_old.numeric_column('price', shape=2) - with ops.Graph().as_default(): - features = {'price': [[1., 2.], [5., 6.]]} - predictions = fc.linear_model(features, [price], units=3) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) - with _initialized_session() as sess: - self.assertAllClose(np.zeros((3,)), bias.eval()) - self.assertAllClose(np.zeros((2, 3)), price_var.eval()) - sess.run(price_var.assign([[1., 2., 3.], [10., 100., 1000.]])) - sess.run(bias.assign([2., 3., 4.])) - self.assertAllClose([[23., 205., 2007.], [67., 613., 6019.]], - predictions.eval()) - - def test_raises_if_shape_mismatch(self): - price = fc_old.numeric_column('price', shape=2) - with ops.Graph().as_default(): - features = {'price': [[1.], [5.]]} - with self.assertRaisesRegexp( - Exception, - r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): - fc.linear_model(features, [price]) - - def test_dense_reshaping(self): - price = fc_old.numeric_column('price', shape=[1, 2]) - with ops.Graph().as_default(): - features = {'price': [[[1., 2.]], [[5., 6.]]]} - predictions = fc.linear_model(features, [price]) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) - with _initialized_session() as sess: - self.assertAllClose([0.], bias.eval()) - self.assertAllClose([[0.], [0.]], price_var.eval()) - self.assertAllClose([[0.], [0.]], predictions.eval()) - sess.run(price_var.assign([[10.], [100.]])) - self.assertAllClose([[210.], [650.]], predictions.eval()) - - def test_dense_multi_column(self): - price1 = fc_old.numeric_column('price1', shape=2) - price2 = fc_old.numeric_column('price2') - with ops.Graph().as_default(): - features = { - 'price1': [[1., 2.], [5., 6.]], - 'price2': [[3.], [4.]] - } - predictions = fc.linear_model(features, [price1, price2]) - bias = get_linear_model_bias() - price1_var = get_linear_model_column_var(price1) - price2_var = get_linear_model_column_var(price2) - with _initialized_session() as sess: - self.assertAllClose([0.], bias.eval()) - self.assertAllClose([[0.], [0.]], price1_var.eval()) - self.assertAllClose([[0.]], price2_var.eval()) - self.assertAllClose([[0.], [0.]], predictions.eval()) - sess.run(price1_var.assign([[10.], [100.]])) - sess.run(price2_var.assign([[1000.]])) - sess.run(bias.assign([7.])) - self.assertAllClose([[3217.], [4657.]], predictions.eval()) - - def test_fills_cols_to_vars(self): - price1 = fc_old.numeric_column('price1', shape=2) - price2 = fc_old.numeric_column('price2') - with ops.Graph().as_default(): - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - cols_to_vars = {} - fc.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars) - bias = get_linear_model_bias() - price1_var = get_linear_model_column_var(price1) - price2_var = get_linear_model_column_var(price2) - self.assertAllEqual(cols_to_vars['bias'], [bias]) - self.assertAllEqual(cols_to_vars[price1], [price1_var]) - self.assertAllEqual(cols_to_vars[price2], [price2_var]) - - def test_fills_cols_to_vars_partitioned_variables(self): - price1 = fc_old.numeric_column('price1', shape=2) - price2 = fc_old.numeric_column('price2', shape=3) - with ops.Graph().as_default(): - features = { - 'price1': [[1., 2.], [6., 7.]], - 'price2': [[3., 4., 5.], [8., 9., 10.]] - } - cols_to_vars = {} - with variable_scope.variable_scope( - 'linear', - partitioner=partitioned_variables.fixed_size_partitioner(2, axis=0)): - fc.linear_model(features, [price1, price2], cols_to_vars=cols_to_vars) - with _initialized_session(): - self.assertEqual([0.], cols_to_vars['bias'][0].eval()) - # Partitioning shards the [2, 1] price1 var into 2 [1, 1] Variables. - self.assertAllEqual([[0.]], cols_to_vars[price1][0].eval()) - self.assertAllEqual([[0.]], cols_to_vars[price1][1].eval()) - # Partitioning shards the [3, 1] price2 var into a [2, 1] Variable and - # a [1, 1] Variable. - self.assertAllEqual([[0.], [0.]], cols_to_vars[price2][0].eval()) - self.assertAllEqual([[0.]], cols_to_vars[price2][1].eval()) - - def test_dense_collection(self): - price = fc_old.numeric_column('price') - with ops.Graph().as_default() as g: - features = {'price': [[1.], [5.]]} - fc.linear_model(features, [price], weight_collections=['my-vars']) - my_vars = g.get_collection('my-vars') - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) - self.assertIn(bias, my_vars) - self.assertIn(price_var, my_vars) - - def test_sparse_collection(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default() as g: - wire_tensor = sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - features = {'wire_cast': wire_tensor} - fc.linear_model( - features, [wire_cast], weight_collections=['my-vars']) - my_vars = g.get_collection('my-vars') - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) - self.assertIn(bias, my_vars) - self.assertIn(wire_cast_var, my_vars) - - def test_dense_trainable_default(self): - price = fc_old.numeric_column('price') - with ops.Graph().as_default() as g: - features = {'price': [[1.], [5.]]} - fc.linear_model(features, [price]) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) - trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertIn(bias, trainable_vars) - self.assertIn(price_var, trainable_vars) - - def test_sparse_trainable_default(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default() as g: - wire_tensor = sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - features = {'wire_cast': wire_tensor} - fc.linear_model(features, [wire_cast]) - trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) - self.assertIn(bias, trainable_vars) - self.assertIn(wire_cast_var, trainable_vars) - - def test_dense_trainable_false(self): - price = fc_old.numeric_column('price') - with ops.Graph().as_default() as g: - features = {'price': [[1.], [5.]]} - fc.linear_model(features, [price], trainable=False) - trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertEqual([], trainable_vars) - - def test_sparse_trainable_false(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default() as g: - wire_tensor = sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - features = {'wire_cast': wire_tensor} - fc.linear_model(features, [wire_cast], trainable=False) - trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - self.assertEqual([], trainable_vars) - - def test_column_order(self): - price_a = fc_old.numeric_column('price_a') - price_b = fc_old.numeric_column('price_b') - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default() as g: - features = { - 'price_a': [[1.]], - 'price_b': [[3.]], - 'wire_cast': - sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - } - fc.linear_model( - features, [price_a, wire_cast, price_b], - weight_collections=['my-vars']) - my_vars = g.get_collection('my-vars') - self.assertIn('price_a', my_vars[0].name) - self.assertIn('price_b', my_vars[1].name) - self.assertIn('wire_cast', my_vars[2].name) - - with ops.Graph().as_default() as g: - features = { - 'price_a': [[1.]], - 'price_b': [[3.]], - 'wire_cast': - sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - } - fc.linear_model( - features, [wire_cast, price_b, price_a], - weight_collections=['my-vars']) - my_vars = g.get_collection('my-vars') - self.assertIn('price_a', my_vars[0].name) - self.assertIn('price_b', my_vars[1].name) - self.assertIn('wire_cast', my_vars[2].name) - - def test_static_batch_size_mismatch(self): - price1 = fc_old.numeric_column('price1') - price2 = fc_old.numeric_column('price2') - with ops.Graph().as_default(): - features = { - 'price1': [[1.], [5.], [7.]], # batchsize = 3 - 'price2': [[3.], [4.]] # batchsize = 2 - } - with self.assertRaisesRegexp( - ValueError, - 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - fc.linear_model(features, [price1, price2]) - - def test_subset_of_static_batch_size_mismatch(self): - price1 = fc_old.numeric_column('price1') - price2 = fc_old.numeric_column('price2') - price3 = fc_old.numeric_column('price3') - with ops.Graph().as_default(): - features = { - 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 - 'price2': [[3.], [4.]], # batchsize = 2 - 'price3': [[3.], [4.], [5.]] # batchsize = 3 - } - with self.assertRaisesRegexp( - ValueError, - 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - fc.linear_model(features, [price1, price2, price3]) - - def test_runtime_batch_size_mismatch(self): - price1 = fc_old.numeric_column('price1') - price2 = fc_old.numeric_column('price2') - with ops.Graph().as_default(): - features = { - 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 - 'price2': [[3.], [4.]] # batchsize = 2 - } - predictions = fc.linear_model(features, [price1, price2]) - with _initialized_session() as sess: - with self.assertRaisesRegexp(errors.OpError, - 'must have the same size and shape'): - sess.run( - predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]}) - - def test_runtime_batch_size_matches(self): - price1 = fc_old.numeric_column('price1') - price2 = fc_old.numeric_column('price2') - with ops.Graph().as_default(): - features = { - 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 - 'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 - } - predictions = fc.linear_model(features, [price1, price2]) - with _initialized_session() as sess: - sess.run( - predictions, - feed_dict={ - features['price1']: [[1.], [5.]], - features['price2']: [[1.], [5.]], - }) - - def test_with_numpy_input_fn(self): - price = fc_old.numeric_column('price') - price_buckets = fc_old.bucketized_column( - price, boundaries=[ - 0., - 10., - 100., - ]) - body_style = fc_old.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - - input_fn = numpy_io.numpy_input_fn( - x={ - 'price': np.array([-1., 2., 13., 104.]), - 'body-style': np.array(['sedan', 'hardtop', 'wagon', 'sedan']), - }, - batch_size=2, - shuffle=False) - features = input_fn() - net = fc.linear_model(features, [price_buckets, body_style]) - # self.assertEqual(1 + 3 + 5, net.shape[1]) - with _initialized_session() as sess: - coord = coordinator.Coordinator() - threads = queue_runner_impl.start_queue_runners(sess, coord=coord) - - bias = get_linear_model_bias() - price_buckets_var = get_linear_model_column_var(price_buckets) - body_style_var = get_linear_model_column_var(body_style) - - sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) - sess.run(bias.assign([5.])) - - self.assertAllClose([[10 - 1000 + 5.], [100 - 10 + 5.]], sess.run(net)) - - coord.request_stop() - coord.join(threads) - - def test_with_1d_sparse_tensor(self): - price = fc_old.numeric_column('price') - price_buckets = fc_old.bucketized_column( - price, boundaries=[ - 0., - 10., - 100., - ]) - body_style = fc_old.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - - # Provides 1-dim tensor and dense tensor. - features = { - 'price': constant_op.constant([-1., 12.,]), - 'body-style': sparse_tensor.SparseTensor( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), - } - self.assertEqual(1, features['price'].shape.ndims) - self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) - - net = fc.linear_model(features, [price_buckets, body_style]) - with _initialized_session() as sess: - bias = get_linear_model_bias() - price_buckets_var = get_linear_model_column_var(price_buckets) - body_style_var = get_linear_model_column_var(body_style) - - sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) - sess.run(bias.assign([5.])) - - self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net)) - - def test_with_1d_unknown_shape_sparse_tensor(self): - price = fc_old.numeric_column('price') - price_buckets = fc_old.bucketized_column( - price, boundaries=[ - 0., - 10., - 100., - ]) - body_style = fc_old.categorical_column_with_vocabulary_list( - 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - country = fc_old.categorical_column_with_vocabulary_list( - 'country', vocabulary_list=['US', 'JP', 'CA']) + ((0.,), (0.,), (0.,), (0.,), (0.,)), crossed_var.eval()) + self.assertAllClose(((0.,), (0.,)), predictions.eval()) + sess.run(crossed_var.assign(((1.,), (2.,), (3.,), (4.,), (5.,)))) + # Expected ids after cross = (1, 0, 1, 3, 4, 2) + self.assertAllClose(((3.,), (14.,)), predictions.eval()) + sess.run(bias.assign((.1,))) + self.assertAllClose(((3.1,), (14.1,)), predictions.eval()) - # Provides 1-dim tensor and dense tensor. - features = { - 'price': array_ops.placeholder(dtypes.float32), - 'body-style': array_ops.sparse_placeholder(dtypes.string), - 'country': array_ops.placeholder(dtypes.string), - } - self.assertIsNone(features['price'].shape.ndims) - self.assertIsNone(features['body-style'].get_shape().ndims) + def test_linear_model_with_weights(self): - price_data = np.array([-1., 12.]) - body_style_data = sparse_tensor.SparseTensorValue( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)) - country_data = np.array(['US', 'CA']) + class _TestColumnWithWeights(fc.CategoricalColumn): + """Produces sparse IDs and sparse weights.""" - net = fc.linear_model(features, [price_buckets, body_style, country]) - bias = get_linear_model_bias() - price_buckets_var = get_linear_model_column_var(price_buckets) - body_style_var = get_linear_model_column_var(body_style) - with _initialized_session() as sess: - sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) - sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) - sess.run(bias.assign([5.])) + @property + def name(self): + return 'test_column' - self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], - sess.run( - net, - feed_dict={ - features['price']: price_data, - features['body-style']: body_style_data, - features['country']: country_data - })) + @property + def parse_example_spec(self): + return { + self.name: parsing_ops.VarLenFeature(dtypes.int32), + '{}_weights'.format(self.name): parsing_ops.VarLenFeature( + dtypes.float32), + } - def test_with_rank_0_feature(self): - price = fc_old.numeric_column('price') - features = { - 'price': constant_op.constant(0), - } - self.assertEqual(0, features['price'].shape.ndims) + @property + def num_buckets(self): + return 5 - # Static rank 0 should fail - with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): - fc.linear_model(features, [price]) + def transform_feature(self, transformation_cache, state_manager): + return (transformation_cache.get(self.name, state_manager), + transformation_cache.get('{}_weights'.format(self.name), + state_manager)) - # Dynamic rank 0 should fail - features = { - 'price': array_ops.placeholder(dtypes.float32), - } - net = fc.linear_model(features, [price]) - self.assertEqual(1, net.shape[1]) - with _initialized_session() as sess: - with self.assertRaisesOpError('Feature .* cannot have rank 0'): - sess.run(net, feed_dict={features['price']: np.array(1)}) + def get_sparse_tensors(self, transformation_cache, state_manager): + """Populates both id_tensor and weight_tensor.""" + ids_and_weights = transformation_cache.get(self, state_manager) + return fc.CategoricalColumn.IdWeightPair( + id_tensor=ids_and_weights[0], weight_tensor=ids_and_weights[1]) - def test_multiple_linear_models(self): - price = fc_old.numeric_column('price') + t = _TestColumnWithWeights() + crossed = fc.crossed_column([t, 'c'], hash_bucket_size=5, hash_key=5) with ops.Graph().as_default(): - features1 = {'price': [[1.], [5.]]} - features2 = {'price': [[2.], [10.]]} - predictions1 = fc.linear_model(features1, [price]) - predictions2 = fc.linear_model(features2, [price]) - bias1 = get_linear_model_bias(name='linear_model') - bias2 = get_linear_model_bias(name='linear_model_1') - price_var1 = get_linear_model_column_var(price, name='linear_model') - price_var2 = get_linear_model_column_var(price, name='linear_model_1') - with _initialized_session() as sess: - self.assertAllClose([0.], bias1.eval()) - sess.run(price_var1.assign([[10.]])) - sess.run(bias1.assign([5.])) - self.assertAllClose([[15.], [55.]], predictions1.eval()) - self.assertAllClose([0.], bias2.eval()) - sess.run(price_var2.assign([[10.]])) - sess.run(bias2.assign([5.])) - self.assertAllClose([[25.], [105.]], predictions2.eval()) + with self.assertRaisesRegexp( + ValueError, + 'crossed_column does not support weight_tensor.*{}'.format(t.name)): + model = fc.LinearModel((crossed,)) + model({ + t.name: + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=[0, 1, 2], + dense_shape=(2, 2)), + '{}_weights'.format(t.name): + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=[1., 10., 2.], + dense_shape=(2, 2)), + 'c': + sparse_tensor.SparseTensor( + indices=((0, 0), (1, 0), (1, 1)), + values=['cA', 'cB', 'cC'], + dense_shape=(2, 2)), + }) -class _LinearModelTest(test.TestCase): +class LinearModelTest(test.TestCase): def test_raises_if_empty_feature_columns(self): with self.assertRaisesRegexp(ValueError, 'feature_columns must not be empty'): - get_keras_linear_model_predictions(features={}, feature_columns=[]) + fc.LinearModel(feature_columns=[]) def test_should_be_feature_column(self): - with self.assertRaisesRegexp(ValueError, 'must be a _FeatureColumn'): - get_keras_linear_model_predictions( - features={'a': [[0]]}, feature_columns='NotSupported') + with self.assertRaisesRegexp(ValueError, 'must be a FeatureColumn'): + fc.LinearModel(feature_columns='NotSupported') def test_should_be_dense_or_categorical_column(self): - class NotSupportedColumn(fc_old._FeatureColumn): + class NotSupportedColumn(fc.FeatureColumn): @property def name(self): return 'NotSupportedColumn' - def _transform_feature(self, cache): + def transform_feature(self, transformation_cache, state_manager): pass @property - def _parse_example_spec(self): + def parse_example_spec(self): pass with self.assertRaisesRegexp( - ValueError, 'must be either a _DenseColumn or _CategoricalColumn'): - get_keras_linear_model_predictions( - features={'a': [[0]]}, feature_columns=[NotSupportedColumn()]) + ValueError, 'must be either a DenseColumn or CategoricalColumn'): + fc.LinearModel(feature_columns=[NotSupportedColumn()]) def test_does_not_support_dict_columns(self): with self.assertRaisesRegexp( ValueError, 'Expected feature_columns to be iterable, found dict.'): - fc.linear_model( - features={'a': [[0]]}, - feature_columns={'a': fc_old.numeric_column('a')}) + fc.LinearModel(feature_columns={'a': fc.numeric_column('a')}) def test_raises_if_duplicate_name(self): with self.assertRaisesRegexp( ValueError, 'Duplicate feature column name found for columns'): - get_keras_linear_model_predictions( - features={'a': [[0]]}, - feature_columns=[ - fc_old.numeric_column('a'), - fc_old.numeric_column('a') - ]) + fc.LinearModel( + feature_columns=[fc.numeric_column('a'), + fc.numeric_column('a')]) def test_dense_bias(self): - price = fc_old.numeric_column('price') + price = fc.numeric_column('price') with ops.Graph().as_default(): features = {'price': [[1.], [5.]]} - predictions = get_keras_linear_model_predictions(features, [price]) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) + model = fc.LinearModel([price]) + predictions = model(features) + price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], bias.eval()) sess.run(price_var.assign([[10.]])) @@ -2045,16 +1149,16 @@ class _LinearModelTest(test.TestCase): self.assertAllClose([[15.], [55.]], predictions.eval()) def test_sparse_bias(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) with ops.Graph().as_default(): wire_tensor = sparse_tensor.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} - predictions = get_keras_linear_model_predictions(features, [wire_cast]) - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) + model = fc.LinearModel([wire_cast]) + predictions = model(features) + wire_cast_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], bias.eval()) self.assertAllClose([[0.], [0.], [0.], [0.]], wire_cast_var.eval()) @@ -2063,19 +1167,17 @@ class _LinearModelTest(test.TestCase): self.assertAllClose([[1005.], [10015.]], predictions.eval()) def test_dense_and_sparse_bias(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - price = fc_old.numeric_column('price') + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + price = fc.numeric_column('price') with ops.Graph().as_default(): wire_tensor = sparse_tensor.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor, 'price': [[1.], [5.]]} - predictions = get_keras_linear_model_predictions(features, - [wire_cast, price]) - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) - price_var = get_linear_model_column_var(price) + model = fc.LinearModel([wire_cast, price]) + predictions = model(features) + price_var, wire_cast_var, bias = model.variables with _initialized_session() as sess: sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(bias.assign([5.])) @@ -2085,42 +1187,36 @@ class _LinearModelTest(test.TestCase): def test_dense_and_sparse_column(self): """When the column is both dense and sparse, uses sparse tensors.""" - class _DenseAndSparseColumn(fc_old._DenseColumn, fc_old._CategoricalColumn): + class _DenseAndSparseColumn(fc.DenseColumn, fc.CategoricalColumn): @property def name(self): return 'dense_and_sparse_column' @property - def _parse_example_spec(self): + def parse_example_spec(self): return {self.name: parsing_ops.VarLenFeature(self.dtype)} - def _transform_feature(self, inputs): - return inputs.get(self.name) + def transform_feature(self, transformation_cache, state_manager): + return transformation_cache.get(self.name, state_manager) @property - def _variable_shape(self): + def variable_shape(self): raise ValueError('Should not use this method.') - def _get_dense_tensor(self, - inputs, - weight_collections=None, - trainable=None): + def get_dense_tensor(self, transformation_cache, state_manager): raise ValueError('Should not use this method.') @property - def _num_buckets(self): + def num_buckets(self): return 4 - def _get_sparse_tensors(self, - inputs, - weight_collections=None, - trainable=None): + def get_sparse_tensors(self, transformation_cache, state_manager): sp_tensor = sparse_tensor.SparseTensor( indices=[[0, 0], [1, 0], [1, 1]], values=[2, 0, 3], dense_shape=[2, 2]) - return fc_old._CategoricalColumn.IdWeightPair(sp_tensor, None) + return fc.CategoricalColumn.IdWeightPair(sp_tensor, None) dense_and_sparse_column = _DenseAndSparseColumn() with ops.Graph().as_default(): @@ -2129,26 +1225,22 @@ class _LinearModelTest(test.TestCase): indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {dense_and_sparse_column.name: sp_tensor} - predictions = get_keras_linear_model_predictions( - features, [dense_and_sparse_column]) - bias = get_linear_model_bias() - dense_and_sparse_column_var = get_linear_model_column_var( - dense_and_sparse_column) + model = fc.LinearModel([dense_and_sparse_column]) + predictions = model(features) + dense_and_sparse_column_var, bias = model.variables with _initialized_session() as sess: - sess.run( - dense_and_sparse_column_var.assign([[10.], [100.], [1000.], - [10000.]])) + sess.run(dense_and_sparse_column_var.assign( + [[10.], [100.], [1000.], [10000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[1005.], [10015.]], predictions.eval()) def test_dense_multi_output(self): - price = fc_old.numeric_column('price') + price = fc.numeric_column('price') with ops.Graph().as_default(): features = {'price': [[1.], [5.]]} - predictions = get_keras_linear_model_predictions( - features, [price], units=3) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) + model = fc.LinearModel([price], units=3) + predictions = model(features) + price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose(np.zeros((3,)), bias.eval()) self.assertAllClose(np.zeros((1, 3)), price_var.eval()) @@ -2158,41 +1250,41 @@ class _LinearModelTest(test.TestCase): predictions.eval()) def test_sparse_multi_output(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) with ops.Graph().as_default(): wire_tensor = sparse_tensor.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} - predictions = get_keras_linear_model_predictions( - features, [wire_cast], units=3) - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) + model = fc.LinearModel([wire_cast], units=3) + predictions = model(features) + wire_cast_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose(np.zeros((3,)), bias.eval()) self.assertAllClose(np.zeros((4, 3)), wire_cast_var.eval()) sess.run( - wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], - [1000., 1100., - 1200.], [10000., 11000., 12000.]])) + wire_cast_var.assign([[10., 11., 12.], [100., 110., 120.], [ + 1000., 1100., 1200. + ], [10000., 11000., 12000.]])) sess.run(bias.assign([5., 6., 7.])) self.assertAllClose([[1005., 1106., 1207.], [10015., 11017., 12019.]], predictions.eval()) def test_dense_multi_dimension(self): - price = fc_old.numeric_column('price', shape=2) + price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): features = {'price': [[1., 2.], [5., 6.]]} - predictions = get_keras_linear_model_predictions(features, [price]) - price_var = get_linear_model_column_var(price) + model = fc.LinearModel([price]) + predictions = model(features) + price_var, _ = model.variables with _initialized_session() as sess: self.assertAllClose([[0.], [0.]], price_var.eval()) sess.run(price_var.assign([[10.], [100.]])) self.assertAllClose([[210.], [650.]], predictions.eval()) def test_sparse_multi_rank(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) with ops.Graph().as_default(): wire_tensor = array_ops.sparse_placeholder(dtypes.string) wire_value = sparse_tensor.SparseTensorValue( @@ -2200,8 +1292,9 @@ class _LinearModelTest(test.TestCase): indices=[[0, 0, 0], [0, 1, 0], [1, 0, 0], [1, 0, 1]], dense_shape=[2, 2, 2]) features = {'wire_cast': wire_tensor} - predictions = get_keras_linear_model_predictions(features, [wire_cast]) - wire_cast_var = get_linear_model_column_var(wire_cast) + model = fc.LinearModel([wire_cast]) + predictions = model(features) + wire_cast_var, _ = model.variables with _initialized_session() as sess: self.assertAllClose(np.zeros((4, 1)), wire_cast_var.eval()) self.assertAllClose( @@ -2213,30 +1306,49 @@ class _LinearModelTest(test.TestCase): predictions.eval(feed_dict={wire_tensor: wire_value})) def test_sparse_combiner(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) with ops.Graph().as_default(): wire_tensor = sparse_tensor.SparseTensor( values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] indices=[[0, 0], [1, 0], [1, 1]], dense_shape=[2, 2]) features = {'wire_cast': wire_tensor} - predictions = get_keras_linear_model_predictions( - features, [wire_cast], sparse_combiner='mean') - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) + model = fc.LinearModel([wire_cast], sparse_combiner='mean') + predictions = model(features) + wire_cast_var, bias = model.variables with _initialized_session() as sess: sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(bias.assign([5.])) self.assertAllClose([[1005.], [5010.]], predictions.eval()) + def test_sparse_combiner_with_negative_weights(self): + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + wire_cast_weights = fc.weighted_categorical_column(wire_cast, 'weights') + + with ops.Graph().as_default(): + wire_tensor = sparse_tensor.SparseTensor( + values=['omar', 'stringer', 'marlo'], # hashed to = [2, 0, 3] + indices=[[0, 0], [1, 0], [1, 1]], + dense_shape=[2, 2]) + features = { + 'wire_cast': wire_tensor, + 'weights': constant_op.constant([[1., 1., -1.0]]) + } + model = fc.LinearModel([wire_cast_weights], sparse_combiner='sum') + predictions = model(features) + wire_cast_var, bias = model.variables + with _initialized_session() as sess: + sess.run(wire_cast_var.assign([[10.], [100.], [1000.], [10000.]])) + sess.run(bias.assign([5.])) + self.assertAllClose([[1005.], [-9985.]], predictions.eval()) + def test_dense_multi_dimension_multi_output(self): - price = fc_old.numeric_column('price', shape=2) + price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): features = {'price': [[1., 2.], [5., 6.]]} - predictions = get_keras_linear_model_predictions( - features, [price], units=3) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) + model = fc.LinearModel([price], units=3) + predictions = model(features) + price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose(np.zeros((3,)), bias.eval()) self.assertAllClose(np.zeros((2, 3)), price_var.eval()) @@ -2246,21 +1358,22 @@ class _LinearModelTest(test.TestCase): predictions.eval()) def test_raises_if_shape_mismatch(self): - price = fc_old.numeric_column('price', shape=2) + price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): features = {'price': [[1.], [5.]]} with self.assertRaisesRegexp( Exception, r'Cannot reshape a tensor with 2 elements to shape \[2,2\]'): - get_keras_linear_model_predictions(features, [price]) + model = fc.LinearModel([price]) + model(features) def test_dense_reshaping(self): - price = fc_old.numeric_column('price', shape=[1, 2]) + price = fc.numeric_column('price', shape=[1, 2]) with ops.Graph().as_default(): features = {'price': [[[1., 2.]], [[5., 6.]]]} - predictions = get_keras_linear_model_predictions(features, [price]) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) + model = fc.LinearModel([price]) + predictions = model(features) + price_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], bias.eval()) self.assertAllClose([[0.], [0.]], price_var.eval()) @@ -2269,15 +1382,16 @@ class _LinearModelTest(test.TestCase): self.assertAllClose([[210.], [650.]], predictions.eval()) def test_dense_multi_column(self): - price1 = fc_old.numeric_column('price1', shape=2) - price2 = fc_old.numeric_column('price2') + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2') with ops.Graph().as_default(): - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - predictions = get_keras_linear_model_predictions(features, - [price1, price2]) - bias = get_linear_model_bias() - price1_var = get_linear_model_column_var(price1) - price2_var = get_linear_model_column_var(price2) + features = { + 'price1': [[1., 2.], [5., 6.]], + 'price2': [[3.], [4.]] + } + model = fc.LinearModel([price1, price2]) + predictions = model(features) + price1_var, price2_var, bias = model.variables with _initialized_session() as sess: self.assertAllClose([0.], bias.eval()) self.assertAllClose([[0.], [0.]], price1_var.eval()) @@ -2288,118 +1402,55 @@ class _LinearModelTest(test.TestCase): sess.run(bias.assign([7.])) self.assertAllClose([[3217.], [4657.]], predictions.eval()) - def test_fills_cols_to_vars(self): - price1 = fc_old.numeric_column('price1', shape=2) - price2 = fc_old.numeric_column('price2') - with ops.Graph().as_default(): - features = {'price1': [[1., 2.], [5., 6.]], 'price2': [[3.], [4.]]} - cols_to_vars = {} - get_keras_linear_model_predictions( - features, [price1, price2], cols_to_vars=cols_to_vars) - bias = get_linear_model_bias() - price1_var = get_linear_model_column_var(price1) - price2_var = get_linear_model_column_var(price2) - self.assertAllEqual(cols_to_vars['bias'], [bias]) - self.assertAllEqual(cols_to_vars[price1], [price1_var]) - self.assertAllEqual(cols_to_vars[price2], [price2_var]) - - def test_fills_cols_to_vars_partitioned_variables(self): - price1 = fc_old.numeric_column('price1', shape=2) - price2 = fc_old.numeric_column('price2', shape=3) - with ops.Graph().as_default(): - features = { - 'price1': [[1., 2.], [6., 7.]], - 'price2': [[3., 4., 5.], [8., 9., 10.]] - } - cols_to_vars = {} - with variable_scope.variable_scope( - 'linear', - partitioner=partitioned_variables.fixed_size_partitioner(2, axis=0)): - get_keras_linear_model_predictions( - features, [price1, price2], cols_to_vars=cols_to_vars) - with _initialized_session(): - self.assertEqual([0.], cols_to_vars['bias'][0].eval()) - # Partitioning shards the [2, 1] price1 var into 2 [1, 1] Variables. - self.assertAllEqual([[0.]], cols_to_vars[price1][0].eval()) - self.assertAllEqual([[0.]], cols_to_vars[price1][1].eval()) - # Partitioning shards the [3, 1] price2 var into a [2, 1] Variable and - # a [1, 1] Variable. - self.assertAllEqual([[0.], [0.]], cols_to_vars[price2][0].eval()) - self.assertAllEqual([[0.]], cols_to_vars[price2][1].eval()) - - def test_dense_collection(self): - price = fc_old.numeric_column('price') - with ops.Graph().as_default() as g: - features = {'price': [[1.], [5.]]} - get_keras_linear_model_predictions( - features, [price], weight_collections=['my-vars']) - my_vars = g.get_collection('my-vars') - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) - self.assertIn(bias, my_vars) - self.assertIn(price_var, my_vars) - - def test_sparse_collection(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default() as g: - wire_tensor = sparse_tensor.SparseTensor( - values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) - features = {'wire_cast': wire_tensor} - get_keras_linear_model_predictions( - features, [wire_cast], weight_collections=['my-vars']) - my_vars = g.get_collection('my-vars') - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) - self.assertIn(bias, my_vars) - self.assertIn(wire_cast_var, my_vars) - def test_dense_trainable_default(self): - price = fc_old.numeric_column('price') + price = fc.numeric_column('price') with ops.Graph().as_default() as g: features = {'price': [[1.], [5.]]} - get_keras_linear_model_predictions(features, [price]) - bias = get_linear_model_bias() - price_var = get_linear_model_column_var(price) + model = fc.LinearModel([price]) + model(features) + price_var, bias = model.variables trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) self.assertIn(bias, trainable_vars) self.assertIn(price_var, trainable_vars) def test_sparse_trainable_default(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) with ops.Graph().as_default() as g: wire_tensor = sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) features = {'wire_cast': wire_tensor} - get_keras_linear_model_predictions(features, [wire_cast]) + model = fc.LinearModel([wire_cast]) + model(features) trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - bias = get_linear_model_bias() - wire_cast_var = get_linear_model_column_var(wire_cast) + wire_cast_var, bias = model.variables self.assertIn(bias, trainable_vars) self.assertIn(wire_cast_var, trainable_vars) def test_dense_trainable_false(self): - price = fc_old.numeric_column('price') + price = fc.numeric_column('price') with ops.Graph().as_default() as g: features = {'price': [[1.], [5.]]} - get_keras_linear_model_predictions(features, [price], trainable=False) + model = fc.LinearModel([price], trainable=False) + model(features) trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual([], trainable_vars) def test_sparse_trainable_false(self): - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) with ops.Graph().as_default() as g: wire_tensor = sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) features = {'wire_cast': wire_tensor} - get_keras_linear_model_predictions(features, [wire_cast], trainable=False) + model = fc.LinearModel([wire_cast], trainable=False) + model(features) trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual([], trainable_vars) def test_column_order(self): - price_a = fc_old.numeric_column('price_a') - price_b = fc_old.numeric_column('price_b') - wire_cast = fc_old.categorical_column_with_hash_bucket('wire_cast', 4) - with ops.Graph().as_default() as g: + price_a = fc.numeric_column('price_a') + price_b = fc.numeric_column('price_b') + wire_cast = fc.categorical_column_with_hash_bucket('wire_cast', 4) + with ops.Graph().as_default(): features = { 'price_a': [[1.]], 'price_b': [[3.]], @@ -2407,15 +1458,15 @@ class _LinearModelTest(test.TestCase): sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) } - get_keras_linear_model_predictions( - features, [price_a, wire_cast, price_b], - weight_collections=['my-vars']) - my_vars = g.get_collection('my-vars') + model = fc.LinearModel([price_a, wire_cast, price_b]) + model(features) + + my_vars = model.variables self.assertIn('price_a', my_vars[0].name) self.assertIn('price_b', my_vars[1].name) self.assertIn('wire_cast', my_vars[2].name) - with ops.Graph().as_default() as g: + with ops.Graph().as_default(): features = { 'price_a': [[1.]], 'price_b': [[3.]], @@ -2423,17 +1474,45 @@ class _LinearModelTest(test.TestCase): sparse_tensor.SparseTensor( values=['omar'], indices=[[0, 0]], dense_shape=[1, 1]) } - get_keras_linear_model_predictions( - features, [wire_cast, price_b, price_a], - weight_collections=['my-vars']) - my_vars = g.get_collection('my-vars') + model = fc.LinearModel([wire_cast, price_b, price_a]) + model(features) + + my_vars = model.variables self.assertIn('price_a', my_vars[0].name) self.assertIn('price_b', my_vars[1].name) self.assertIn('wire_cast', my_vars[2].name) + def test_variable_names(self): + price1 = fc.numeric_column('price1') + dense_feature = fc.numeric_column('dense_feature') + dense_feature_bucketized = fc.bucketized_column( + dense_feature, boundaries=[0.]) + some_sparse_column = fc.categorical_column_with_hash_bucket( + 'sparse_feature', hash_bucket_size=5) + some_embedding_column = fc.embedding_column( + some_sparse_column, dimension=10) + all_cols = [price1, dense_feature_bucketized, some_embedding_column] + + with ops.Graph().as_default(): + model = fc.LinearModel(all_cols) + features = { + 'price1': [[3.], [4.]], + 'dense_feature': [[-1.], [4.]], + 'sparse_feature': [['a'], ['x']], + } + model(features) + variable_names = [var.name for var in model.variables] + self.assertItemsEqual([ + 'linear_model/dense_feature_bucketized/weights:0', + 'linear_model/price1/weights:0', + 'linear_model/sparse_feature_embedding/embedding_weights:0', + 'linear_model/sparse_feature_embedding/weights:0', + 'linear_model/bias_weights:0', + ], variable_names) + def test_static_batch_size_mismatch(self): - price1 = fc_old.numeric_column('price1') - price2 = fc_old.numeric_column('price2') + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') with ops.Graph().as_default(): features = { 'price1': [[1.], [5.], [7.]], # batchsize = 3 @@ -2442,12 +1521,13 @@ class _LinearModelTest(test.TestCase): with self.assertRaisesRegexp( ValueError, 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - get_keras_linear_model_predictions(features, [price1, price2]) + model = fc.LinearModel([price1, price2]) + model(features) def test_subset_of_static_batch_size_mismatch(self): - price1 = fc_old.numeric_column('price1') - price2 = fc_old.numeric_column('price2') - price3 = fc_old.numeric_column('price3') + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') + price3 = fc.numeric_column('price3') with ops.Graph().as_default(): features = { 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 @@ -2457,18 +1537,19 @@ class _LinearModelTest(test.TestCase): with self.assertRaisesRegexp( ValueError, 'Batch size \(first dimension\) of each feature must be same.'): # pylint: disable=anomalous-backslash-in-string - get_keras_linear_model_predictions(features, [price1, price2, price3]) + model = fc.LinearModel([price1, price2, price3]) + model(features) def test_runtime_batch_size_mismatch(self): - price1 = fc_old.numeric_column('price1') - price2 = fc_old.numeric_column('price2') + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') with ops.Graph().as_default(): features = { 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 3 'price2': [[3.], [4.]] # batchsize = 2 } - predictions = get_keras_linear_model_predictions(features, - [price1, price2]) + model = fc.LinearModel([price1, price2]) + predictions = model(features) with _initialized_session() as sess: with self.assertRaisesRegexp(errors.OpError, 'must have the same size and shape'): @@ -2476,15 +1557,15 @@ class _LinearModelTest(test.TestCase): predictions, feed_dict={features['price1']: [[1.], [5.], [7.]]}) def test_runtime_batch_size_matches(self): - price1 = fc_old.numeric_column('price1') - price2 = fc_old.numeric_column('price2') + price1 = fc.numeric_column('price1') + price2 = fc.numeric_column('price2') with ops.Graph().as_default(): features = { 'price1': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 'price2': array_ops.placeholder(dtype=dtypes.int64), # batchsize = 2 } - predictions = get_keras_linear_model_predictions(features, - [price1, price2]) + model = fc.LinearModel([price1, price2]) + predictions = model(features) with _initialized_session() as sess: sess.run( predictions, @@ -2494,14 +1575,14 @@ class _LinearModelTest(test.TestCase): }) def test_with_numpy_input_fn(self): - price = fc_old.numeric_column('price') - price_buckets = fc_old.bucketized_column( + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column( price, boundaries=[ 0., 10., 100., ]) - body_style = fc_old.categorical_column_with_vocabulary_list( + body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) input_fn = numpy_io.numpy_input_fn( @@ -2512,16 +1593,14 @@ class _LinearModelTest(test.TestCase): batch_size=2, shuffle=False) features = input_fn() - net = get_keras_linear_model_predictions(features, - [price_buckets, body_style]) + model = fc.LinearModel([price_buckets, body_style]) + net = model(features) # self.assertEqual(1 + 3 + 5, net.shape[1]) with _initialized_session() as sess: coord = coordinator.Coordinator() threads = queue_runner_impl.start_queue_runners(sess, coord=coord) - bias = get_linear_model_bias() - price_buckets_var = get_linear_model_column_var(price_buckets) - body_style_var = get_linear_model_column_var(body_style) + body_style_var, price_buckets_var, bias = model.variables sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) @@ -2533,38 +1612,31 @@ class _LinearModelTest(test.TestCase): coord.join(threads) def test_with_1d_sparse_tensor(self): - price = fc_old.numeric_column('price') - price_buckets = fc_old.bucketized_column( + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column( price, boundaries=[ 0., 10., 100., ]) - body_style = fc_old.categorical_column_with_vocabulary_list( + body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) # Provides 1-dim tensor and dense tensor. features = { - 'price': - constant_op.constant([ - -1., - 12., - ]), - 'body-style': - sparse_tensor.SparseTensor( - indices=((0,), (1,)), - values=('sedan', 'hardtop'), - dense_shape=(2,)), + 'price': constant_op.constant([-1., 12.,]), + 'body-style': sparse_tensor.SparseTensor( + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)), } self.assertEqual(1, features['price'].shape.ndims) self.assertEqual(1, features['body-style'].dense_shape.get_shape()[0]) - net = get_keras_linear_model_predictions(features, - [price_buckets, body_style]) + model = fc.LinearModel([price_buckets, body_style]) + net = model(features) with _initialized_session() as sess: - bias = get_linear_model_bias() - price_buckets_var = get_linear_model_column_var(price_buckets) - body_style_var = get_linear_model_column_var(body_style) + body_style_var, price_buckets_var, bias = model.variables sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) @@ -2573,16 +1645,16 @@ class _LinearModelTest(test.TestCase): self.assertAllClose([[10 - 1000 + 5.], [1000 - 10 + 5.]], sess.run(net)) def test_with_1d_unknown_shape_sparse_tensor(self): - price = fc_old.numeric_column('price') - price_buckets = fc_old.bucketized_column( + price = fc.numeric_column('price') + price_buckets = fc.bucketized_column( price, boundaries=[ 0., 10., 100., ]) - body_style = fc_old.categorical_column_with_vocabulary_list( + body_style = fc.categorical_column_with_vocabulary_list( 'body-style', vocabulary_list=['hardtop', 'wagon', 'sedan']) - country = fc_old.categorical_column_with_vocabulary_list( + country = fc.categorical_column_with_vocabulary_list( 'country', vocabulary_list=['US', 'JP', 'CA']) # Provides 1-dim tensor and dense tensor. @@ -2596,14 +1668,14 @@ class _LinearModelTest(test.TestCase): price_data = np.array([-1., 12.]) body_style_data = sparse_tensor.SparseTensorValue( - indices=((0,), (1,)), values=('sedan', 'hardtop'), dense_shape=(2,)) + indices=((0,), (1,)), + values=('sedan', 'hardtop'), + dense_shape=(2,)) country_data = np.array(['US', 'CA']) - net = get_keras_linear_model_predictions( - features, [price_buckets, body_style, country]) - bias = get_linear_model_bias() - price_buckets_var = get_linear_model_column_var(price_buckets) - body_style_var = get_linear_model_column_var(body_style) + model = fc.LinearModel([price_buckets, body_style, country]) + net = model(features) + body_style_var, _, price_buckets_var, bias = model.variables with _initialized_session() as sess: sess.run(price_buckets_var.assign([[10.], [100.], [1000.], [10000.]])) sess.run(body_style_var.assign([[-10.], [-100.], [-1000.]])) @@ -2619,7 +1691,7 @@ class _LinearModelTest(test.TestCase): })) def test_with_rank_0_feature(self): - price = fc_old.numeric_column('price') + price = fc.numeric_column('price') features = { 'price': constant_op.constant(0), } @@ -2627,18 +1699,41 @@ class _LinearModelTest(test.TestCase): # Static rank 0 should fail with self.assertRaisesRegexp(ValueError, 'Feature .* cannot have rank 0'): - get_keras_linear_model_predictions(features, [price]) + model = fc.LinearModel([price]) + model(features) # Dynamic rank 0 should fail features = { 'price': array_ops.placeholder(dtypes.float32), } - net = get_keras_linear_model_predictions(features, [price]) + model = fc.LinearModel([price]) + net = model(features) self.assertEqual(1, net.shape[1]) with _initialized_session() as sess: with self.assertRaisesOpError('Feature .* cannot have rank 0'): sess.run(net, feed_dict={features['price']: np.array(1)}) + def test_multiple_linear_models(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features1 = {'price': [[1.], [5.]]} + features2 = {'price': [[2.], [10.]]} + model1 = fc.LinearModel([price]) + model2 = fc.LinearModel([price]) + predictions1 = model1(features1) + predictions2 = model2(features2) + price_var1, bias1 = model1.variables + price_var2, bias2 = model2.variables + with _initialized_session() as sess: + self.assertAllClose([0.], bias1.eval()) + sess.run(price_var1.assign([[10.]])) + sess.run(bias1.assign([5.])) + self.assertAllClose([[15.], [55.]], predictions1.eval()) + self.assertAllClose([0.], bias2.eval()) + sess.run(price_var2.assign([[10.]])) + sess.run(bias2.assign([5.])) + self.assertAllClose([[25.], [105.]], predictions2.eval()) + class FeatureLayerTest(test.TestCase): @@ -3739,47 +2834,22 @@ class VocabularyFileCategoricalColumnTest(test.TestCase): id_weight_pair.id_tensor.eval()) def test_linear_model(self): - wire_column = fc_old.categorical_column_with_vocabulary_file( - key='wire', - vocabulary_file=self._wire_vocabulary_file_name, - vocabulary_size=self._wire_vocabulary_size, - num_oov_buckets=1) - self.assertEqual(4, wire_column._num_buckets) - with ops.Graph().as_default(): - predictions = fc.linear_model({ - wire_column.name: sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - }, (wire_column,)) - bias = get_linear_model_bias() - wire_var = get_linear_model_column_var(wire_column) - with _initialized_session(): - self.assertAllClose((0.,), bias.eval()) - self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) - self.assertAllClose(((0.,), (0.,)), predictions.eval()) - wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() - # 'marlo' -> 2: wire_var[2] = 3 - # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 - self.assertAllClose(((3.,), (5.,)), predictions.eval()) - - def test_keras_linear_model(self): - wire_column = fc_old.categorical_column_with_vocabulary_file( + wire_column = fc.categorical_column_with_vocabulary_file( key='wire', vocabulary_file=self._wire_vocabulary_file_name, vocabulary_size=self._wire_vocabulary_size, num_oov_buckets=1) - self.assertEqual(4, wire_column._num_buckets) + self.assertEqual(4, wire_column.num_buckets) with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions({ + model = fc.LinearModel((wire_column,)) + predictions = model({ wire_column.name: sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) - }, (wire_column,)) - bias = get_linear_model_bias() - wire_var = get_linear_model_column_var(wire_column) + }) + wire_var, bias = model.variables with _initialized_session(): self.assertAllClose((0.,), bias.eval()) self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) @@ -4131,54 +3201,30 @@ class VocabularyListCategoricalColumnTest(test.TestCase): }), None) self.assertIsNone(id_weight_pair.weight_tensor) with _initialized_session(): - _assert_sparse_tensor_value( - self, - sparse_tensor.SparseTensorValue( - indices=inputs.indices, - values=np.array((2, 60, 0, 4), dtype=np.int64), - dense_shape=inputs.dense_shape), - id_weight_pair.id_tensor.eval()) - - def test_linear_model(self): - wire_column = fc_old.categorical_column_with_vocabulary_list( - key='aaa', - vocabulary_list=('omar', 'stringer', 'marlo'), - num_oov_buckets=1) - self.assertEqual(4, wire_column._num_buckets) - with ops.Graph().as_default(): - predictions = fc.linear_model({ - wire_column.name: sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=('marlo', 'skywalker', 'omar'), - dense_shape=(2, 2)) - }, (wire_column,)) - bias = get_linear_model_bias() - wire_var = get_linear_model_column_var(wire_column) - with _initialized_session(): - self.assertAllClose((0.,), bias.eval()) - self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) - self.assertAllClose(((0.,), (0.,)), predictions.eval()) - wire_var.assign(((1.,), (2.,), (3.,), (4.,))).eval() - # 'marlo' -> 2: wire_var[2] = 3 - # 'skywalker' -> 3, 'omar' -> 0: wire_var[3] + wire_var[0] = 4+1 = 5 - self.assertAllClose(((3.,), (5.,)), predictions.eval()) + _assert_sparse_tensor_value( + self, + sparse_tensor.SparseTensorValue( + indices=inputs.indices, + values=np.array((2, 60, 0, 4), dtype=np.int64), + dense_shape=inputs.dense_shape), + id_weight_pair.id_tensor.eval()) - def test_keras_linear_model(self): - wire_column = fc_old.categorical_column_with_vocabulary_list( + def test_linear_model(self): + wire_column = fc.categorical_column_with_vocabulary_list( key='aaa', vocabulary_list=('omar', 'stringer', 'marlo'), num_oov_buckets=1) - self.assertEqual(4, wire_column._num_buckets) + self.assertEqual(4, wire_column.num_buckets) with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions({ + model = fc.LinearModel((wire_column,)) + predictions = model({ wire_column.name: sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=('marlo', 'skywalker', 'omar'), dense_shape=(2, 2)) - }, (wire_column,)) - bias = get_linear_model_bias() - wire_var = get_linear_model_column_var(wire_column) + }) + wire_var, bias = model.variables with _initialized_session(): self.assertAllClose((0.,), bias.eval()) self.assertAllClose(((0.,), (0.,), (0.,), (0.,)), wire_var.eval()) @@ -4398,39 +3444,18 @@ class IdentityCategoricalColumnTest(test.TestCase): })) def test_linear_model(self): - column = fc_old.categorical_column_with_identity(key='aaa', num_buckets=3) - self.assertEqual(3, column.num_buckets) - with ops.Graph().as_default(): - predictions = fc.linear_model({ - column.name: sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)) - }, (column,)) - bias = get_linear_model_bias() - weight_var = get_linear_model_column_var(column) - with _initialized_session(): - self.assertAllClose((0.,), bias.eval()) - self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) - self.assertAllClose(((0.,), (0.,)), predictions.eval()) - weight_var.assign(((1.,), (2.,), (3.,))).eval() - # weight_var[0] = 1 - # weight_var[2] + weight_var[1] = 3+2 = 5 - self.assertAllClose(((1.,), (5.,)), predictions.eval()) - - def test_keras_linear_model(self): - column = fc_old.categorical_column_with_identity(key='aaa', num_buckets=3) + column = fc.categorical_column_with_identity(key='aaa', num_buckets=3) self.assertEqual(3, column.num_buckets) with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions({ + model = fc.LinearModel((column,)) + predictions = model({ column.name: sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(0, 2, 1), dense_shape=(2, 2)) - }, (column,)) - bias = get_linear_model_bias() - weight_var = get_linear_model_column_var(column) + }) + weight_var, bias = model.variables with _initialized_session(): self.assertAllClose((0.,), bias.eval()) self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) @@ -4656,27 +3681,8 @@ class IndicatorColumnTest(test.TestCase): self.assertAllEqual([[0., 1., 1.]], indicator_tensor.eval()) def test_linear_model(self): - animal = fc_old.indicator_column( - fc_old.categorical_column_with_identity('animal', num_buckets=4)) - with ops.Graph().as_default(): - features = { - 'animal': - sparse_tensor.SparseTensor( - indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) - } - - predictions = fc.linear_model(features, [animal]) - weight_var = get_linear_model_column_var(animal) - with _initialized_session(): - # All should be zero-initialized. - self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval()) - self.assertAllClose([[0.]], predictions.eval()) - weight_var.assign([[1.], [2.], [3.], [4.]]).eval() - self.assertAllClose([[2. + 3.]], predictions.eval()) - - def test_keras_linear_model(self): - animal = fc_old.indicator_column( - fc_old.categorical_column_with_identity('animal', num_buckets=4)) + animal = fc.indicator_column( + fc.categorical_column_with_identity('animal', num_buckets=4)) with ops.Graph().as_default(): features = { 'animal': @@ -4684,8 +3690,9 @@ class IndicatorColumnTest(test.TestCase): indices=[[0, 0], [0, 1]], values=[1, 2], dense_shape=[1, 2]) } - predictions = get_keras_linear_model_predictions(features, [animal]) - weight_var = get_linear_model_column_var(animal) + model = fc.LinearModel([animal]) + predictions = model(features) + weight_var, _ = model.variables with _initialized_session(): # All should be zero-initialized. self.assertAllClose([[0.], [0.], [0.], [0.]], weight_var.eval()) @@ -5137,17 +4144,16 @@ class EmbeddingColumnTest(test.TestCase): return zeros_embedding_values # Build columns. - categorical_column = fc_old.categorical_column_with_identity( + categorical_column = fc.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) - embedding_column = fc_old.embedding_column( + embedding_column = fc.embedding_column( categorical_column, dimension=embedding_dimension, initializer=_initializer) with ops.Graph().as_default(): - predictions = fc.linear_model({ - categorical_column.name: sparse_input - }, (embedding_column,)) + model = fc.LinearModel((embedding_column,)) + predictions = model({categorical_column.name: sparse_input}) expected_var_names = ( 'linear_model/bias_weights:0', 'linear_model/aaa_embedding/weights:0', @@ -5189,82 +4195,6 @@ class EmbeddingColumnTest(test.TestCase): # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42] self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval()) - def test_keras_linear_model(self): - # Inputs. - batch_size = 4 - vocabulary_size = 3 - sparse_input = sparse_tensor.SparseTensorValue( - # example 0, ids [2] - # example 1, ids [0, 1] - # example 2, ids [] - # example 3, ids [1] - indices=((0, 0), (1, 0), (1, 4), (3, 0)), - values=(2, 0, 1, 1), - dense_shape=(batch_size, 5)) - - # Embedding variable. - embedding_dimension = 2 - embedding_shape = (vocabulary_size, embedding_dimension) - zeros_embedding_values = np.zeros(embedding_shape) - - def _initializer(shape, dtype, partition_info): - self.assertAllEqual(embedding_shape, shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return zeros_embedding_values - - # Build columns. - categorical_column = fc_old.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - embedding_column = fc_old.embedding_column( - categorical_column, - dimension=embedding_dimension, - initializer=_initializer) - - with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions({ - categorical_column.name: sparse_input - }, (embedding_column,)) - expected_var_names = ( - 'linear_model/bias_weights:0', - 'linear_model/aaa_embedding/weights:0', - 'linear_model/aaa_embedding/embedding_weights:0', - ) - self.assertItemsEqual( - expected_var_names, - [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - trainable_vars = { - v.name: v - for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - } - self.assertItemsEqual(expected_var_names, trainable_vars.keys()) - bias = trainable_vars['linear_model/bias_weights:0'] - embedding_weights = trainable_vars[ - 'linear_model/aaa_embedding/embedding_weights:0'] - linear_weights = trainable_vars['linear_model/aaa_embedding/weights:0'] - with _initialized_session(): - # Predictions with all zero weights. - self.assertAllClose(np.zeros((1,)), bias.eval()) - self.assertAllClose(zeros_embedding_values, embedding_weights.eval()) - self.assertAllClose( - np.zeros((embedding_dimension, 1)), linear_weights.eval()) - self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval()) - - # Predictions with all non-zero weights. - embedding_weights.assign(( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - )).eval() - linear_weights.assign(((4.,), (6.,))).eval() - # example 0, ids [2], embedding[0] = [7, 11] - # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] - # example 2, ids [], embedding[2] = [0, 0] - # example 3, ids [1], embedding[3] = [3, 5] - # sum(embeddings * linear_weights) - # = [4*7 + 6*11, 4*2 + 6*3.5, 4*0 + 6*0, 4*3 + 6*5] = [94, 29, 0, 42] - self.assertAllClose(((94.,), (29.,), (0.,), (42.,)), predictions.eval()) - def test_feature_layer(self): # Inputs. vocabulary_size = 3 @@ -5765,27 +4695,31 @@ class SharedEmbeddingColumnTest(test.TestCase): return zeros_embedding_values # Build columns. - categorical_column_a = fc_old.categorical_column_with_identity( + categorical_column_a = fc.categorical_column_with_identity( key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc_old.categorical_column_with_identity( + categorical_column_b = fc.categorical_column_with_identity( key='bbb', num_buckets=vocabulary_size) - embedding_column_a, embedding_column_b = fc_old.shared_embedding_columns( + embedding_column_a, embedding_column_b = fc.shared_embedding_columns_v2( [categorical_column_a, categorical_column_b], dimension=embedding_dimension, initializer=_initializer) with ops.Graph().as_default(): - predictions = fc.linear_model({ + model = fc.LinearModel( + (embedding_column_a, embedding_column_b), + shared_state_manager=fc.SharedEmbeddingStateManager()) + predictions = model({ categorical_column_a.name: input_a, - categorical_column_b.name: input_b, - }, (embedding_column_a, embedding_column_b)) + categorical_column_b.name: input_b + }) + # Linear weights do not follow the column name. But this is a rare use # case, and fixing it would add too much complexity to the code. expected_var_names = ( 'linear_model/bias_weights:0', - 'linear_model/aaa_bbb_shared_embedding/weights:0', - 'linear_model/aaa_bbb_shared_embedding/embedding_weights:0', - 'linear_model/aaa_bbb_shared_embedding_1/weights:0', + 'linear_model/aaa_shared_embedding/weights:0', + 'shared_embedding_state_manager/aaa_bbb_shared_embedding:0', + 'linear_model/bbb_shared_embedding/weights:0', ) self.assertItemsEqual( expected_var_names, @@ -5797,102 +4731,11 @@ class SharedEmbeddingColumnTest(test.TestCase): self.assertItemsEqual(expected_var_names, trainable_vars.keys()) bias = trainable_vars['linear_model/bias_weights:0'] embedding_weights = trainable_vars[ - 'linear_model/aaa_bbb_shared_embedding/embedding_weights:0'] - linear_weights_a = trainable_vars[ - 'linear_model/aaa_bbb_shared_embedding/weights:0'] - linear_weights_b = trainable_vars[ - 'linear_model/aaa_bbb_shared_embedding_1/weights:0'] - with _initialized_session(): - # Predictions with all zero weights. - self.assertAllClose(np.zeros((1,)), bias.eval()) - self.assertAllClose(zeros_embedding_values, embedding_weights.eval()) - self.assertAllClose( - np.zeros((embedding_dimension, 1)), linear_weights_a.eval()) - self.assertAllClose( - np.zeros((embedding_dimension, 1)), linear_weights_b.eval()) - self.assertAllClose(np.zeros((batch_size, 1)), predictions.eval()) - - # Predictions with all non-zero weights. - embedding_weights.assign(( - (1., 2.), # id 0 - (3., 5.), # id 1 - (7., 11.) # id 2 - )).eval() - linear_weights_a.assign(((4.,), (6.,))).eval() - # example 0, ids [2], embedding[0] = [7, 11] - # example 1, ids [0, 1], embedding[1] = mean([1, 2] + [3, 5]) = [2, 3.5] - # sum(embeddings * linear_weights) - # = [4*7 + 6*11, 4*2 + 6*3.5] = [94, 29] - linear_weights_b.assign(((3.,), (5.,))).eval() - # example 0, ids [0], embedding[0] = [1, 2] - # example 1, ids [], embedding[1] = 0, 0] - # sum(embeddings * linear_weights) - # = [3*1 + 5*2, 3*0 +5*0] = [13, 0] - self.assertAllClose([[94. + 13.], [29.]], predictions.eval()) - - def test_keras_linear_model(self): - # Inputs. - batch_size = 2 - vocabulary_size = 3 - # -1 values are ignored. - input_a = np.array([ - [2, -1, -1], # example 0, ids [2] - [0, 1, -1] - ]) # example 1, ids [0, 1] - input_b = np.array([ - [0, -1, -1], # example 0, ids [0] - [-1, -1, -1] - ]) # example 1, ids [] - - # Embedding variable. - embedding_dimension = 2 - embedding_shape = (vocabulary_size, embedding_dimension) - zeros_embedding_values = np.zeros(embedding_shape) - - def _initializer(shape, dtype, partition_info): - self.assertAllEqual(embedding_shape, shape) - self.assertEqual(dtypes.float32, dtype) - self.assertIsNone(partition_info) - return zeros_embedding_values - - # Build columns. - categorical_column_a = fc_old.categorical_column_with_identity( - key='aaa', num_buckets=vocabulary_size) - categorical_column_b = fc_old.categorical_column_with_identity( - key='bbb', num_buckets=vocabulary_size) - embedding_column_a, embedding_column_b = fc_old.shared_embedding_columns( - [categorical_column_a, categorical_column_b], - dimension=embedding_dimension, - initializer=_initializer) - - with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions({ - categorical_column_a.name: input_a, - categorical_column_b.name: input_b, - }, (embedding_column_a, embedding_column_b)) - # Linear weights do not follow the column name. But this is a rare use - # case, and fixing it would add too much complexity to the code. - expected_var_names = ( - 'linear_model/bias_weights:0', - 'linear_model/aaa_bbb_shared_embedding/weights:0', - 'linear_model/aaa_bbb_shared_embedding/embedding_weights:0', - 'linear_model/aaa_bbb_shared_embedding_1/weights:0', - ) - self.assertItemsEqual( - expected_var_names, - [v.name for v in ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)]) - trainable_vars = { - v.name: v - for v in ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) - } - self.assertItemsEqual(expected_var_names, trainable_vars.keys()) - bias = trainable_vars['linear_model/bias_weights:0'] - embedding_weights = trainable_vars[ - 'linear_model/aaa_bbb_shared_embedding/embedding_weights:0'] + 'shared_embedding_state_manager/aaa_bbb_shared_embedding:0'] linear_weights_a = trainable_vars[ - 'linear_model/aaa_bbb_shared_embedding/weights:0'] + 'linear_model/aaa_shared_embedding/weights:0'] linear_weights_b = trainable_vars[ - 'linear_model/aaa_bbb_shared_embedding_1/weights:0'] + 'linear_model/bbb_shared_embedding/weights:0'] with _initialized_session(): # Predictions with all zero weights. self.assertAllClose(np.zeros((1,)), bias.eval()) @@ -6291,13 +5134,14 @@ class WeightedCategoricalColumnTest(test.TestCase): dense_shape=(2, 2)), weight_tensor.eval()) - def test_keras_linear_model(self): - column = fc_old.weighted_categorical_column( - categorical_column=fc_old.categorical_column_with_identity( + def test_linear_model(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( key='ids', num_buckets=3), weight_feature_key='values') with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions({ + model = fc.LinearModel((column,)) + predictions = model({ 'ids': sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), @@ -6308,9 +5152,8 @@ class WeightedCategoricalColumnTest(test.TestCase): indices=((0, 0), (1, 0), (1, 1)), values=(.5, 1., .1), dense_shape=(2, 2)) - }, (column,)) - bias = get_linear_model_bias() - weight_var = get_linear_model_column_var(column) + }) + weight_var, bias = model.variables with _initialized_session(): self.assertAllClose((0.,), bias.eval()) self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) @@ -6321,15 +5164,16 @@ class WeightedCategoricalColumnTest(test.TestCase): # = 3*1 + 2*.1 = 3+.2 = 3.2 self.assertAllClose(((.5,), (3.2,)), predictions.eval()) - def test_keras_linear_model_mismatched_shape(self): - column = fc_old.weighted_categorical_column( - categorical_column=fc_old.categorical_column_with_identity( + def test_linear_model_mismatched_shape(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( key='ids', num_buckets=3), weight_feature_key='values') with ops.Graph().as_default(): - with self.assertRaisesRegexp(ValueError, - r'Dimensions.*are not compatible'): - get_keras_linear_model_predictions({ + with self.assertRaisesRegexp( + ValueError, r'Dimensions.*are not compatible'): + model = fc.LinearModel((column,)) + model({ 'ids': sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), @@ -6340,122 +5184,23 @@ class WeightedCategoricalColumnTest(test.TestCase): indices=((0, 0), (0, 1), (1, 0), (1, 1)), values=(.5, 11., 1., .1), dense_shape=(2, 2)) - }, (column,)) - - def test_keras_linear_model_mismatched_dense_values(self): - column = fc_old.weighted_categorical_column( - categorical_column=fc_old.categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions( - { - 'ids': - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), - 'values': ((.5,), (1.,)) - }, (column,), - sparse_combiner='mean') - # Disabling the constant folding optimizer here since it changes the - # error message differently on CPU and GPU. - config = config_pb2.ConfigProto() - config.graph_options.rewrite_options.constant_folding = ( - rewriter_config_pb2.RewriterConfig.OFF) - with _initialized_session(config): - with self.assertRaisesRegexp(errors.OpError, 'Incompatible shapes'): - predictions.eval() + }) - def test_keras_linear_model_mismatched_dense_shape(self): - column = fc_old.weighted_categorical_column( - categorical_column=fc_old.categorical_column_with_identity( + def test_linear_model_mismatched_dense_values(self): + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( key='ids', num_buckets=3), weight_feature_key='values') with ops.Graph().as_default(): - predictions = get_keras_linear_model_predictions({ + model = fc.LinearModel((column,), sparse_combiner='mean') + predictions = model({ 'ids': sparse_tensor.SparseTensorValue( indices=((0, 0), (1, 0), (1, 1)), values=(0, 2, 1), dense_shape=(2, 2)), - 'values': ((.5,), (1.,), (.1,)) - }, (column,)) - bias = get_linear_model_bias() - weight_var = get_linear_model_column_var(column) - with _initialized_session(): - self.assertAllClose((0.,), bias.eval()) - self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) - self.assertAllClose(((0.,), (0.,)), predictions.eval()) - weight_var.assign(((1.,), (2.,), (3.,))).eval() - # weight_var[0] * weights[0, 0] = 1 * .5 = .5 - # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] - # = 3*1 + 2*.1 = 3+.2 = 3.2 - self.assertAllClose(((.5,), (3.2,)), predictions.eval()) - - def test_linear_model(self): - column = fc_old.weighted_categorical_column( - categorical_column=fc_old.categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - with ops.Graph().as_default(): - predictions = fc.linear_model({ - 'ids': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), - 'values': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(.5, 1., .1), - dense_shape=(2, 2)) - }, (column,)) - bias = get_linear_model_bias() - weight_var = get_linear_model_column_var(column) - with _initialized_session(): - self.assertAllClose((0.,), bias.eval()) - self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) - self.assertAllClose(((0.,), (0.,)), predictions.eval()) - weight_var.assign(((1.,), (2.,), (3.,))).eval() - # weight_var[0] * weights[0, 0] = 1 * .5 = .5 - # weight_var[2] * weights[1, 0] + weight_var[1] * weights[1, 1] - # = 3*1 + 2*.1 = 3+.2 = 3.2 - self.assertAllClose(((.5,), (3.2,)), predictions.eval()) - - def test_linear_model_mismatched_shape(self): - column = fc_old.weighted_categorical_column( - categorical_column=fc_old.categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - with ops.Graph().as_default(): - with self.assertRaisesRegexp( - ValueError, r'Dimensions.*are not compatible'): - fc.linear_model({ - 'ids': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), - 'values': sparse_tensor.SparseTensorValue( - indices=((0, 0), (0, 1), (1, 0), (1, 1)), - values=(.5, 11., 1., .1), - dense_shape=(2, 2)) - }, (column,)) - - def test_linear_model_mismatched_dense_values(self): - column = fc_old.weighted_categorical_column( - categorical_column=fc_old.categorical_column_with_identity( - key='ids', num_buckets=3), - weight_feature_key='values') - with ops.Graph().as_default(): - predictions = fc.linear_model( - { - 'ids': - sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), - 'values': ((.5,), (1.,)) - }, (column,), - sparse_combiner='mean') + 'values': ((.5,), (1.,)) + }) # Disabling the constant folding optimizer here since it changes the # error message differently on CPU and GPU. config = config_pb2.ConfigProto() @@ -6466,20 +5211,21 @@ class WeightedCategoricalColumnTest(test.TestCase): predictions.eval() def test_linear_model_mismatched_dense_shape(self): - column = fc_old.weighted_categorical_column( - categorical_column=fc_old.categorical_column_with_identity( + column = fc.weighted_categorical_column( + categorical_column=fc.categorical_column_with_identity( key='ids', num_buckets=3), weight_feature_key='values') with ops.Graph().as_default(): - predictions = fc.linear_model({ - 'ids': sparse_tensor.SparseTensorValue( - indices=((0, 0), (1, 0), (1, 1)), - values=(0, 2, 1), - dense_shape=(2, 2)), + model = fc.LinearModel((column,)) + predictions = model({ + 'ids': + sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(0, 2, 1), + dense_shape=(2, 2)), 'values': ((.5,), (1.,), (.1,)) - }, (column,)) - bias = get_linear_model_bias() - weight_var = get_linear_model_column_var(column) + }) + weight_var, bias = model.variables with _initialized_session(): self.assertAllClose((0.,), bias.eval()) self.assertAllClose(((0.,), (0.,), (0.,)), weight_var.eval()) -- GitLab From 97cba0b88cb3ce6a3f3cc66a8c4fd414bd3ac1a8 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Thu, 27 Sep 2018 20:59:37 -0700 Subject: [PATCH 124/570] Allowing source_device to be set to /cpu:0 for multi device iterator in distribution strategies. That is always the appropriate option. In the existing code, we would set it to a partially specified "worker" name that was ambiguous and end up on the GPU. PiperOrigin-RevId: 214882658 --- tensorflow/contrib/distribute/python/mirrored_strategy.py | 3 +-- tensorflow/contrib/distribute/python/values.py | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index 945f450387..504f45a695 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -482,8 +482,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): return values.PerDeviceDataset( self._call_dataset_fn(dataset_fn), self._devices, - self._prefetch_on_device, - source_device=device_util.resolve("/device:CPU:0")) + self._prefetch_on_device) # TODO(priyag): Deal with OutOfRange errors once b/111349762 is fixed. def _run_steps_on_dataset(self, fn, iterator, iterations, diff --git a/tensorflow/contrib/distribute/python/values.py b/tensorflow/contrib/distribute/python/values.py index a0cd029f51..cce41e7717 100644 --- a/tensorflow/contrib/distribute/python/values.py +++ b/tensorflow/contrib/distribute/python/values.py @@ -708,10 +708,8 @@ class PerDeviceDataset(object): dataset, devices, prefetch_on_device=None, - source_device="/cpu:0", ): self._devices = devices - self._source_device = source_device if source_device is not None else "/cpu:0" # Default to using prefetching in graph mode, unless specified. # TODO(rohanj): Enable prefetching in eager mode. @@ -750,7 +748,7 @@ class PerDeviceDataset(object): "Please use `make_one_shot_iterator` instead.") if self._prefetch_on_device: dataset_iterator = multi_device_iterator_ops.MultiDeviceIterator( - self._dataset, self._devices, source_device=self._source_device) + self._dataset, self._devices) else: dataset_iterator = self._dataset.make_initializable_iterator() return PerDeviceDataIterator( @@ -838,7 +836,6 @@ class MultiWorkerDataset(object): self._datasets[worker] = PerDeviceDataset( worker_input, worker_devices, - source_device=worker, prefetch_on_device=prefetch_on_device) def make_one_shot_iterator(self): -- GitLab From 7fd14feb9cbc690b362633639b27393576472c79 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 27 Sep 2018 21:11:42 -0700 Subject: [PATCH 125/570] Kernel signature reworking, remove Dims from tensor functions. PiperOrigin-RevId: 214883775 --- .../contrib/lite/kernels/internal/tensor.h | 4 --- .../lite/kernels/internal/tensor_ctypes.h | 29 --------------- .../lite/kernels/internal/tensor_test.cc | 36 ++++++++++--------- 3 files changed, 20 insertions(+), 49 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/tensor.h b/tensorflow/contrib/lite/kernels/internal/tensor.h index 765c3a03ef..689cea03e7 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor.h +++ b/tensorflow/contrib/lite/kernels/internal/tensor.h @@ -37,10 +37,6 @@ inline const std::complex* GetTensorData(const TfLiteTensor* tensor) { : nullptr; } -inline Dims<4> GetTensorDims(std::vector data) { - return GetTensorDims(data.data(), data.size()); -} - inline RuntimeShape GetTensorShape(std::vector data) { return RuntimeShape(data.size(), data.data()); } diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h b/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h index 5e688ce452..9f5b33d217 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h +++ b/tensorflow/contrib/lite/kernels/internal/tensor_ctypes.h @@ -86,35 +86,6 @@ inline const bool* GetTensorData(const TfLiteTensor* tensor) { return tensor != nullptr ? tensor->data.b : nullptr; } -// TODO(ahentz): the implementations in kernels/internal/ take a Dims<4> object -// even if the original tensors were not 4D. We should consider rewriting them -// to take a more generic 'shape' object. -inline Dims<4> GetTensorDims(const int data[], const int size) { - Dims<4> d; - for (int i = 0; i < 4; ++i) { - int src = size - i - 1; - if (src >= 0) { - d.sizes[i] = data[src]; - } else { - d.sizes[i] = 1; - } - } - d.strides[0] = 1; - for (int i = 1; i < 4; i++) { - d.strides[i] = d.strides[i - 1] * d.sizes[i - 1]; - } - return d; -} - -inline Dims<4> GetTensorDims(const TfLiteTensor* tensor) { - if (tensor == nullptr) { - return Dims<4>(); - } - - auto* dims = tensor->dims; - return GetTensorDims(dims->data, dims->size); -} - inline RuntimeShape GetTensorShape(const TfLiteTensor* tensor) { if (tensor == nullptr) { return RuntimeShape(); diff --git a/tensorflow/contrib/lite/kernels/internal/tensor_test.cc b/tensorflow/contrib/lite/kernels/internal/tensor_test.cc index bf2068d320..2ed73ba82d 100644 --- a/tensorflow/contrib/lite/kernels/internal/tensor_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/tensor_test.cc @@ -21,28 +21,32 @@ namespace { using ::testing::ElementsAre; -TEST(TensorTest, GetTensorDims4D) { - Dims<4> d = GetTensorDims({2, 3, 4, 5}); - EXPECT_THAT(d.sizes, ElementsAre(5, 4, 3, 2)); - EXPECT_THAT(d.strides, ElementsAre(1, 5, 20, 60)); +TEST(TensorTest, GetTensorShape4D) { + RuntimeShape d = GetTensorShape({2, 3, 4, 5}); + EXPECT_THAT( + std::vector(d.DimsData(), d.DimsData() + d.DimensionsCount()), + ElementsAre(2, 3, 4, 5)); } -TEST(TensorTest, GetTensorDims3D) { - Dims<4> d = GetTensorDims({3, 4, 5}); - EXPECT_THAT(d.sizes, ElementsAre(5, 4, 3, 1)); - EXPECT_THAT(d.strides, ElementsAre(1, 5, 20, 60)); +TEST(TensorTest, GetTensorShape3D) { + RuntimeShape d = GetTensorShape({3, 4, 5}); + EXPECT_THAT( + std::vector(d.DimsData(), d.DimsData() + d.DimensionsCount()), + ElementsAre(3, 4, 5)); } -TEST(TensorTest, GetTensorDims2D) { - Dims<4> d = GetTensorDims({4, 5}); - EXPECT_THAT(d.sizes, ElementsAre(5, 4, 1, 1)); - EXPECT_THAT(d.strides, ElementsAre(1, 5, 20, 20)); +TEST(TensorTest, GetTensorShape2D) { + RuntimeShape d = GetTensorShape({4, 5}); + EXPECT_THAT( + std::vector(d.DimsData(), d.DimsData() + d.DimensionsCount()), + ElementsAre(4, 5)); } -TEST(TensorTest, GetTensorDims1D) { - Dims<4> d = GetTensorDims({5}); - EXPECT_THAT(d.sizes, ElementsAre(5, 1, 1, 1)); - EXPECT_THAT(d.strides, ElementsAre(1, 5, 5, 5)); +TEST(TensorTest, GetTensorShape1D) { + RuntimeShape d = GetTensorShape({5}); + EXPECT_THAT( + std::vector(d.DimsData(), d.DimsData() + d.DimensionsCount()), + ElementsAre(5)); } } // namespace -- GitLab From efe17306442aa91192df953ae537d3f9b824dae6 Mon Sep 17 00:00:00 2001 From: IMBurbank Date: Thu, 27 Sep 2018 22:21:47 -0600 Subject: [PATCH 126/570] Updated python3 tf_inspect.getargspec calls to use getfullargspec and repackage the return values into the getargspec struct. --- .../python/losses/python/tuple_losses_impl.py | 2 +- .../labeled_tensor/python/ops/_typecheck.py | 2 +- .../layers/python/layers/rev_block_lib.py | 3 +- .../python/learn/estimators/estimator.py | 4 +- .../learn/python/learn/estimators/head.py | 2 +- .../learn/python/learn/experiment_test.py | 2 +- .../learn/python/learn/export_strategy.py | 2 +- .../contrib/learn/python/learn/metric_spec.py | 2 +- .../contrib/learn/python/learn/monitors.py | 2 +- .../contrib/tpu/python/tpu/tpu_function.py | 2 +- tensorflow/python/framework/errors_impl.py | 2 +- tensorflow/python/framework/function.py | 6 +- tensorflow/python/keras/backend_test.py | 2 +- tensorflow/python/keras/testing_utils.py | 2 +- .../kernel_tests/variable_scope_test.py | 4 +- tensorflow/python/ops/variable_scope.py | 4 +- tensorflow/python/util/tf_contextlib_test.py | 2 +- tensorflow/python/util/tf_inspect.py | 89 ++++++++++++------- .../api/lib/python_object_to_proto_visitor.py | 4 +- 19 files changed, 79 insertions(+), 59 deletions(-) diff --git a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py index 00a83e5e55..221c70c38b 100644 --- a/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py +++ b/tensorflow/contrib/gan/python/losses/python/tuple_losses_impl.py @@ -101,7 +101,7 @@ def _args_to_gan_model(loss_fn): """ # Match arguments in `loss_fn` to elements of `namedtuple`. # TODO(joelshor): Properly handle `varargs` and `keywords`. - argspec = tf_inspect.getfullargspec(loss_fn) + argspec = tf_inspect.getargspec(loss_fn) defaults = argspec.defaults or [] required_args = set(argspec.args[:-len(defaults)]) diff --git a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py index 0e23039847..80fa17ec1f 100644 --- a/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py +++ b/tensorflow/contrib/labeled_tensor/python/ops/_typecheck.py @@ -230,7 +230,7 @@ def accepts(*types): def check_accepts(f): """Check the types.""" - spec = tf_inspect.getfullargspec(f) + spec = tf_inspect.getargspec(f) num_function_arguments = len(spec.args) if len(types) != num_function_arguments: diff --git a/tensorflow/contrib/layers/python/layers/rev_block_lib.py b/tensorflow/contrib/layers/python/layers/rev_block_lib.py index 55979cc391..06da32072f 100644 --- a/tensorflow/contrib/layers/python/layers/rev_block_lib.py +++ b/tensorflow/contrib/layers/python/layers/rev_block_lib.py @@ -576,8 +576,7 @@ def _recomputing_grad_fn(compute_fn, def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False): """See recompute_grad.""" - has_is_recompute_kwarg = ( - "is_recomputing" in tf_inspect.getfullargspec(fn).args) + has_is_recompute_kwarg = "is_recomputing" in tf_inspect.getargspec(fn).args for arg in args: if not isinstance(arg, framework_ops.Tensor): raise ValueError("All inputs to function must be Tensors") diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index b88923bca2..c1de42782e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -199,11 +199,11 @@ def _model_fn_args(fn): if hasattr(fn, 'func') and hasattr(fn, 'keywords') and hasattr(fn, 'args'): # Handle functools.partial and similar objects. return tuple([ - arg for arg in tf_inspect.getfullargspec(fn.func).args[len(fn.args):] + arg for arg in tf_inspect.getargspec(fn.func).args[len(fn.args):] if arg not in set(fn.keywords.keys()) ]) # Handle function. - return tuple(tf_inspect.getfullargspec(fn).args) + return tuple(tf_inspect.getargspec(fn).args) def _get_replica_device_setter(config): diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 63dd08316b..c6f79e00d5 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -1861,7 +1861,7 @@ def _get_arguments(func): _, func = tf_decorator.unwrap(func) if hasattr(func, "__code__"): # Regular function. - return tf_inspect.getfullargspec(func) + return tf_inspect.getargspec(func) elif hasattr(func, "func"): # Partial function. return _get_arguments(func.func) diff --git a/tensorflow/contrib/learn/python/learn/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py index 6926696fb6..fb16c94c29 100644 --- a/tensorflow/contrib/learn/python/learn/experiment_test.py +++ b/tensorflow/contrib/learn/python/learn/experiment_test.py @@ -126,7 +126,7 @@ class TestBaseEstimator(object): def _check_method_supports_args(method, kwargs): """Checks that the given method supports the given args.""" - supported_args = tuple(tf_inspect.getfullargspec(method).args) + supported_args = tuple(tf_inspect.getargspec(method).args) for kwarg in kwargs: if kwarg not in supported_args: raise ValueError( diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py index 0d6e0cdc18..075cab536e 100644 --- a/tensorflow/contrib/learn/python/learn/export_strategy.py +++ b/tensorflow/contrib/learn/python/learn/export_strategy.py @@ -96,7 +96,7 @@ class ExportStrategy( """ # don't break existing export_fns that don't accept checkpoint_path and # eval_result - export_fn_args = tf_inspect.getfullargspec(self.export_fn).args + export_fn_args = tf_inspect.getargspec(self.export_fn).args kwargs = {} if 'checkpoint_path' in export_fn_args: kwargs['checkpoint_path'] = checkpoint_path diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py index 604d6d46b4..97220365d5 100644 --- a/tensorflow/contrib/learn/python/learn/metric_spec.py +++ b/tensorflow/contrib/learn/python/learn/metric_spec.py @@ -51,7 +51,7 @@ def _args(fn): return tuple( [arg for arg in _args(fn.func) if arg not in set(fn.keywords.keys())]) # Handle function. - return tuple(tf_inspect.getfullargspec(fn).args) + return tuple(tf_inspect.getargspec(fn).args) _CANONICAL_LABELS_ARG = 'labels' diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py index 5f61e0264f..3d691d4340 100644 --- a/tensorflow/contrib/learn/python/learn/monitors.py +++ b/tensorflow/contrib/learn/python/learn/monitors.py @@ -1303,7 +1303,7 @@ class RunHookAdapterForMonitors(session_run_hook.SessionRunHook): def end(self, session): self._last_step = None for m in self._monitors: - if "session" in tf_inspect.getfullargspec(m.end).args: + if "session" in tf_inspect.getargspec(m.end).args: m.end(session=session) else: m.end() diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_function.py b/tensorflow/contrib/tpu/python/tpu/tpu_function.py index 9c4bd1c4d1..0c7a38dbbb 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_function.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_function.py @@ -80,7 +80,7 @@ def check_function_argument_count(func, input_arity, infeed_queue): number_of_arguments_needed = input_arity if infeed_queue is not None: number_of_arguments_needed += infeed_queue.number_of_tuple_elements - arg_spec = tf_inspect.getfullargspec(func) + arg_spec = tf_inspect.getargspec(func) number_of_args = len(arg_spec.args) if arg_spec.defaults is None: number_of_defaults = 0 diff --git a/tensorflow/python/framework/errors_impl.py b/tensorflow/python/framework/errors_impl.py index c373e75a74..5af71f2cfb 100644 --- a/tensorflow/python/framework/errors_impl.py +++ b/tensorflow/python/framework/errors_impl.py @@ -55,7 +55,7 @@ class OpError(Exception): def __reduce__(self): # Allow the subclasses to accept less arguments in their __init__. - init_argspec = tf_inspect.getfullargspec(self.__class__.__init__) + init_argspec = tf_inspect.getargspec(self.__class__.__init__) args = tuple(getattr(self, arg) for arg in init_argspec.args[1:]) return self.__class__, args diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 3db6f683c9..225208944e 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -132,9 +132,9 @@ class Defun(object): raise ValueError("func %s must be callable" % func) # Func should not use kwargs and defaults. - argspec = tf_inspect.getfullargspec(func) - if argspec.varkw or argspec.defaults: - raise ValueError("Functions with argument defaults or varkw " + argspec = tf_inspect.getargspec(func) + if argspec.keywords or argspec.defaults: + raise ValueError("Functions with argument defaults or keywords " "arguments are not supported.") # Computes how many arguments 'func' has. diff --git a/tensorflow/python/keras/backend_test.py b/tensorflow/python/keras/backend_test.py index 31191d0d35..ab71589940 100644 --- a/tensorflow/python/keras/backend_test.py +++ b/tensorflow/python/keras/backend_test.py @@ -452,7 +452,7 @@ class BackendLinearAlgebraTest(test.TestCase): compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5), keras_kwargs={'axis': -1}, np_kwargs={'axis': -1}) - if 'keepdims' in tf_inspect.getfullargspec(keras_op).args: + if 'keepdims' in tf_inspect.getargspec(keras_op).args: compare_single_input_op_to_numpy(keras_op, np_op, input_shape=(4, 7, 5), keras_kwargs={'axis': 1, diff --git a/tensorflow/python/keras/testing_utils.py b/tensorflow/python/keras/testing_utils.py index 1afaba5653..501b50ba5f 100644 --- a/tensorflow/python/keras/testing_utils.py +++ b/tensorflow/python/keras/testing_utils.py @@ -102,7 +102,7 @@ def layer_test(layer_cls, kwargs=None, input_shape=None, input_dtype=None, layer.set_weights(weights) # test and instantiation from weights - if 'weights' in tf_inspect.getfullargspec(layer_cls.__init__): + if 'weights' in tf_inspect.getargspec(layer_cls.__init__): kwargs['weights'] = weights layer = layer_cls(**kwargs) diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 1d0b72b17a..401e1ae102 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -998,8 +998,8 @@ class VariableScopeTest(test.TestCase): def testSignatureGetVarVsGetLocalVar(self): """get_{local,}variable() must take the same list of args.""" - arg_names = tf_inspect.getfullargspec(variable_scope.get_variable)[0] - local_arg_names = tf_inspect.getfullargspec( + arg_names = tf_inspect.getargspec(variable_scope.get_variable)[0] + local_arg_names = tf_inspect.getargspec( variable_scope.get_local_variable)[0] self.assertEqual(arg_names, local_arg_names) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 3cc1eb916d..a43676cd70 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -892,14 +892,14 @@ class _VariableStore(object): if shape and shape.is_fully_defined(): init_val = lambda: initializer( # pylint: disable=g-long-lambda shape.as_list(), dtype=dtype, partition_info=partition_info) - elif not tf_inspect.getfullargspec(initializer).args: + elif not tf_inspect.getargspec(initializer).args: init_val = initializer else: raise ValueError("You can only pass an initializer function that " "expects no arguments to its callable when the " "shape is not fully defined. The given initializer " "function expects the following args %s" % - tf_inspect.getfullargspec(initializer).args) + tf_inspect.getargspec(initializer).args) variable_dtype = dtype.base_dtype # Create the variable. diff --git a/tensorflow/python/util/tf_contextlib_test.py b/tensorflow/python/util/tf_contextlib_test.py index 1e921b5ea3..4a5bf388a6 100644 --- a/tensorflow/python/util/tf_contextlib_test.py +++ b/tensorflow/python/util/tf_contextlib_test.py @@ -83,7 +83,7 @@ class TfContextlibTest(test.TestCase): self.assertFalse(isinstance(target, tf_decorator.TFDecorator)) def testGetArgSpecReturnsWrappedArgSpec(self): - argspec = tf_inspect.getfullargspec(test_params_and_defaults) + argspec = tf_inspect.getargspec(test_params_and_defaults) self.assertEqual(['a', 'b', 'c', 'd'], argspec.args) self.assertEqual((2, True, 'hello'), argspec.defaults) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index 234850ac3f..3cd6c515b9 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -36,6 +36,53 @@ else: 'annotations' ]) +if hasattr(_inspect, 'getfullargspec'): + _getfullargspec = _inspect.getfullargspec # pylint: disable=invalid-name + + def _getargspec(target): + """A python3 version of getargspec. + + Calls `getfullargspec` and assigns args, varargs, + varkw, and defaults to a python 2/3 compatible `ArgSpec`. + + The parameter name 'varkw' is changed to 'keywords' to fit the + `ArgSpec` struct. + + Args: + target: the target object to inspect. + Returns: + An ArgSpec with args, varargs, keywords, and defaults parameters + from FullArgSpec. + """ + fullargspecs = getfullargspec(target) + argspecs = ArgSpec( + args=fullargspecs.args, + varargs=fullargspecs.varargs, + keywords=fullargspecs.varkw, + defaults=fullargspecs.defaults) + return argspecs +else: + _getargspec = _inspect.getargspec + + def _getfullargspec(target): + """A python2 version of getfullargspec. + + Args: + target: the target object to inspect. + Returns: + A FullArgSpec with empty kwonlyargs, kwonlydefaults and annotations. + """ + argspecs = getargspec(target) + fullargspecs = FullArgSpec( + args=argspecs.args, + varargs=argspecs.varargs, + varkw=argspecs.keywords, + defaults=argspecs.defaults, + kwonlyargs=[], + kwonlydefaults=None, + annotations={}) + return fullargspecs + def currentframe(): """TFDecorator-aware replacement for inspect.currentframe.""" @@ -45,10 +92,8 @@ def currentframe(): def getargspec(obj): """TFDecorator-aware replacement for `inspect.getargspec`. - This should not be called from other modules. It is deprecated in python3. - - Use `getfullargspec`. It is a TFDecorator-aware replacement for - `inspect.getfullargspec` compatible with both python2 and python3. + Note: `getfullargspec` is recommended as the python 2/3 compatible + replacement for this function. Args: obj: A function, partial function, or callable object, possibly @@ -56,8 +101,8 @@ def getargspec(obj): Returns: The `ArgSpec` that describes the signature of the outermost decorator that - changes the callable's signature. If the callable is not decorated, - `inspect.getargspec()` will be called directly on the object. + changes the callable's signature, or the `ArgSpec` that describes + the object if not decorated. Raises: ValueError: When callable's signature can not be expressed with @@ -77,24 +122,24 @@ def getargspec(obj): try: # Python3 will handle most callables here (not partial). - return _inspect.getargspec(target) + return _getargspec(target) except TypeError: pass if isinstance(target, type): try: - return _inspect.getargspec(target.__init__) + return _getargspec(target.__init__) except TypeError: pass try: - return _inspect.getargspec(target.__new__) + return _getargspec(target.__new__) except TypeError: pass # The `type(target)` ensures that if a class is received we don't return # the signature of it's __call__ method. - return _inspect.getargspec(type(target).__call__) + return _getargspec(type(target).__call__) def _get_argspec_for_partial(obj): @@ -177,30 +222,6 @@ def _get_argspec_for_partial(obj): return ArgSpec(args, varargs, keywords, tuple(all_defaults[first_default:])) -if hasattr(_inspect, 'getfullargspec'): - _getfullargspec = _inspect.getfullargspec -else: - - def _getfullargspec(target): - """A python2 version of getfullargspec. - - Args: - target: the target object to inspect. - Returns: - A FullArgSpec with empty kwonlyargs, kwonlydefaults and annotations. - """ - argspecs = getargspec(target) - fullargspecs = FullArgSpec( - args=argspecs.args, - varargs=argspecs.varargs, - varkw=argspecs.keywords, - defaults=argspecs.defaults, - kwonlyargs=[], - kwonlydefaults=None, - annotations={}) - return fullargspecs - - def getfullargspec(obj): """TFDecorator-aware replacement for `inspect.getfullargspec`. diff --git a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py index a8e69fda4f..3a48cf683c 100644 --- a/tensorflow/tools/api/lib/python_object_to_proto_visitor.py +++ b/tensorflow/tools/api/lib/python_object_to_proto_visitor.py @@ -47,9 +47,9 @@ def _SanitizedArgSpec(obj): string, a string representation of the argspec. """ output_string = '' - unsanitized_arg_spec = tf_inspect.getfullargspec(obj) + unsanitized_arg_spec = tf_inspect.getargspec(obj) - for clean_attr in ('args', 'varargs', 'varkw'): + for clean_attr in ('args', 'varargs', 'keywords'): output_string += '%s=%s, ' % (clean_attr, getattr(unsanitized_arg_spec, clean_attr)) -- GitLab From f4eccdda0ca2b06328363191975fa8364ba14728 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 27 Sep 2018 21:54:33 -0700 Subject: [PATCH 127/570] Run buildifier on workspace.bzl. PiperOrigin-RevId: 214886657 --- tensorflow/workspace.bzl | 380 +++++++++++++++++++-------------------- 1 file changed, 190 insertions(+), 190 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 6966783efd..70bade060e 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -57,39 +57,39 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): # Point //external/local_config_arm_compiler to //external/arm_compiler arm_compiler_configure( name = "local_config_arm_compiler", - remote_config_repo = "../arm_compiler", build_file = clean_dep("//third_party/toolchains/cpus/arm:BUILD"), + remote_config_repo = "../arm_compiler", ) mkl_repository( name = "mkl_linux", + build_file = clean_dep("//third_party/mkl:mkl.BUILD"), + sha256 = "e2233534a9d15c387e22260997af4312a39e9f86f791768409be273b5453c4e6", + strip_prefix = "mklml_lnx_2019.0.20180710", urls = [ "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.16/mklml_lnx_2019.0.20180710.tgz", "https://github.com/intel/mkl-dnn/releases/download/v0.16/mklml_lnx_2019.0.20180710.tgz", ], - sha256 = "e2233534a9d15c387e22260997af4312a39e9f86f791768409be273b5453c4e6", - strip_prefix = "mklml_lnx_2019.0.20180710", - build_file = clean_dep("//third_party/mkl:mkl.BUILD"), ) mkl_repository( name = "mkl_windows", + build_file = clean_dep("//third_party/mkl:mkl.BUILD"), + sha256 = "3fdcff17b018a0082491adf3ba143358265336a801646e46e0191ec8d58d24a2", + strip_prefix = "mklml_win_2019.0.20180710", urls = [ "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.16/mklml_win_2019.0.20180710.zip", "https://github.com/intel/mkl-dnn/releases/download/v0.16/mklml_win_2019.0.20180710.zip", ], - sha256 = "3fdcff17b018a0082491adf3ba143358265336a801646e46e0191ec8d58d24a2", - strip_prefix = "mklml_win_2019.0.20180710", - build_file = clean_dep("//third_party/mkl:mkl.BUILD"), ) mkl_repository( name = "mkl_darwin", + build_file = clean_dep("//third_party/mkl:mkl.BUILD"), + sha256 = "411a30014a938eb83fb9f37b3dbe8e371b106fc1dd621fc23123cadc72737ce6", + strip_prefix = "mklml_mac_2019.0.20180710", urls = [ "https://mirror.bazel.build/github.com/intel/mkl-dnn/releases/download/v0.16/mklml_mac_2019.0.20180710.tgz", "https://github.com/intel/mkl-dnn/releases/download/v0.16/mklml_mac_2019.0.20180710.tgz", ], - sha256 = "411a30014a938eb83fb9f37b3dbe8e371b106fc1dd621fc23123cadc72737ce6", - strip_prefix = "mklml_mac_2019.0.20180710", - build_file = clean_dep("//third_party/mkl:mkl.BUILD"), ) if path_prefix: @@ -98,39 +98,40 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "mkl_dnn", + build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), + sha256 = "363cc9239eacf8e7917753c6d8c94f767e4cd049160d0654a61ef32d5e1b3049", + strip_prefix = "mkl-dnn-4e333787e0d66a1dca1218e99a891d493dbc8ef1", urls = [ "https://mirror.bazel.build/github.com/intel/mkl-dnn/archive/4e333787e0d66a1dca1218e99a891d493dbc8ef1.tar.gz", "https://github.com/intel/mkl-dnn/archive/4e333787e0d66a1dca1218e99a891d493dbc8ef1.tar.gz", ], - sha256 = "363cc9239eacf8e7917753c6d8c94f767e4cd049160d0654a61ef32d5e1b3049", - strip_prefix = "mkl-dnn-4e333787e0d66a1dca1218e99a891d493dbc8ef1", - build_file = clean_dep("//third_party/mkl_dnn:mkldnn.BUILD"), ) tf_http_archive( name = "com_google_absl", + build_file = clean_dep("//third_party:com_google_absl.BUILD"), + sha256 = "278a1af58b633be886fe81bf7061dca6b5fea99566850d1319fffdaa1a061792", + strip_prefix = "abseil-cpp-e291c279e458761e77a69b09b129d3d1e81f1e80", urls = [ "https://mirror.bazel.build/github.com/abseil/abseil-cpp/archive/e291c279e458761e77a69b09b129d3d1e81f1e80.tar.gz", "https://github.com/abseil/abseil-cpp/archive/e291c279e458761e77a69b09b129d3d1e81f1e80.tar.gz", ], - sha256 = "278a1af58b633be886fe81bf7061dca6b5fea99566850d1319fffdaa1a061792", - strip_prefix = "abseil-cpp-e291c279e458761e77a69b09b129d3d1e81f1e80", - build_file = clean_dep("//third_party:com_google_absl.BUILD"), ) tf_http_archive( name = "eigen_archive", + build_file = clean_dep("//third_party:eigen.BUILD"), + sha256 = "d956415d784fa4e42b6a2a45c32556d6aec9d0a3d8ef48baee2522ab762556a9", + strip_prefix = "eigen-eigen-fd6845384b86", urls = [ "https://mirror.bazel.build/bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz", "https://bitbucket.org/eigen/eigen/get/fd6845384b86.tar.gz", ], - sha256 = "d956415d784fa4e42b6a2a45c32556d6aec9d0a3d8ef48baee2522ab762556a9", - strip_prefix = "eigen-eigen-fd6845384b86", - build_file = clean_dep("//third_party:eigen.BUILD"), ) tf_http_archive( name = "arm_compiler", + build_file = clean_dep("//:arm_compiler.BUILD"), sha256 = "970285762565c7890c6c087d262b0a18286e7d0384f13a37786d8521773bc969", strip_prefix = "tools-0e906ebc527eab1cdbf7adabff5b474da9562e9f/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf", urls = [ @@ -139,216 +140,211 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): # remove the whitelist entry in third_party/repo.bzl. # "https://github.com/raspberrypi/tools/archive/0e906ebc527eab1cdbf7adabff5b474da9562e9f.tar.gz", ], - build_file = clean_dep("//:arm_compiler.BUILD"), ) tf_http_archive( name = "libxsmm_archive", + build_file = clean_dep("//third_party:libxsmm.BUILD"), + sha256 = "cd8532021352b4a0290d209f7f9bfd7c2411e08286a893af3577a43457287bfa", + strip_prefix = "libxsmm-1.9", urls = [ "https://mirror.bazel.build/github.com/hfp/libxsmm/archive/1.9.tar.gz", "https://github.com/hfp/libxsmm/archive/1.9.tar.gz", ], - sha256 = "cd8532021352b4a0290d209f7f9bfd7c2411e08286a893af3577a43457287bfa", - strip_prefix = "libxsmm-1.9", - build_file = clean_dep("//third_party:libxsmm.BUILD"), ) tf_http_archive( name = "ortools_archive", + build_file = clean_dep("//third_party:ortools.BUILD"), + sha256 = "d025a95f78b5fc5eaa4da5f395f23d11c23cf7dbd5069f1f627f002de87b86b9", + strip_prefix = "or-tools-6.7.2/src", urls = [ "https://mirror.bazel.build/github.com/google/or-tools/archive/v6.7.2.tar.gz", "https://github.com/google/or-tools/archive/v6.7.2.tar.gz", ], - sha256 = "d025a95f78b5fc5eaa4da5f395f23d11c23cf7dbd5069f1f627f002de87b86b9", - strip_prefix = "or-tools-6.7.2/src", - build_file = clean_dep("//third_party:ortools.BUILD"), ) tf_http_archive( name = "com_googlesource_code_re2", + sha256 = "803c7811146edeef8f91064de37c6f19136ff01a2a8cdb3230e940b2fd9f07fe", + strip_prefix = "re2-2018-07-01", + system_build_file = clean_dep("//third_party/systemlibs:re2.BUILD"), urls = [ "https://mirror.bazel.build/github.com/google/re2/archive/2018-07-01.tar.gz", "https://github.com/google/re2/archive/2018-07-01.tar.gz", ], - sha256 = "803c7811146edeef8f91064de37c6f19136ff01a2a8cdb3230e940b2fd9f07fe", - strip_prefix = "re2-2018-07-01", - system_build_file = clean_dep("//third_party/systemlibs:re2.BUILD"), ) tf_http_archive( name = "com_github_googlecloudplatform_google_cloud_cpp", - urls = [ - "https://mirror.bazel.build/github.com/GoogleCloudPlatform/google-cloud-cpp/archive/14760a86c4ffab9943b476305c4fe927ad95db1c.tar.gz", - "https://github.com/GoogleCloudPlatform/google-cloud-cpp/archive/14760a86c4ffab9943b476305c4fe927ad95db1c.tar.gz", - ], sha256 = "fdd3b3aecce60987e5525e55bf3a21d68a8695320bd5b980775af6507eec3944", strip_prefix = "google-cloud-cpp-14760a86c4ffab9943b476305c4fe927ad95db1c", system_build_file = clean_dep("//third_party/systemlibs:google_cloud_cpp.BUILD"), system_link_files = { "//third_party/systemlibs:google_cloud_cpp.google.cloud.bigtable.BUILD": "google/cloud/bigtable/BUILD", }, + urls = [ + "https://mirror.bazel.build/github.com/GoogleCloudPlatform/google-cloud-cpp/archive/14760a86c4ffab9943b476305c4fe927ad95db1c.tar.gz", + "https://github.com/GoogleCloudPlatform/google-cloud-cpp/archive/14760a86c4ffab9943b476305c4fe927ad95db1c.tar.gz", + ], ) tf_http_archive( name = "com_github_googleapis_googleapis", + build_file = clean_dep("//third_party:googleapis.BUILD"), + sha256 = "824870d87a176f26bcef663e92051f532fac756d1a06b404055dc078425f4378", + strip_prefix = "googleapis-f81082ea1e2f85c43649bee26e0d9871d4b41cdb", + system_build_file = clean_dep("//third_party/systemlibs:googleapis.BUILD"), urls = [ "https://mirror.bazel.build/github.com/googleapis/googleapis/archive/f81082ea1e2f85c43649bee26e0d9871d4b41cdb.zip", "https://github.com/googleapis/googleapis/archive/f81082ea1e2f85c43649bee26e0d9871d4b41cdb.zip", ], - sha256 = "824870d87a176f26bcef663e92051f532fac756d1a06b404055dc078425f4378", - strip_prefix = "googleapis-f81082ea1e2f85c43649bee26e0d9871d4b41cdb", - build_file = clean_dep("//third_party:googleapis.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:googleapis.BUILD"), ) tf_http_archive( name = "gemmlowp", + sha256 = "b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658", + strip_prefix = "gemmlowp-38ebac7b059e84692f53e5938f97a9943c120d98", urls = [ "https://mirror.bazel.build/github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip", "https://github.com/google/gemmlowp/archive/38ebac7b059e84692f53e5938f97a9943c120d98.zip", ], - sha256 = "b87faa7294dfcc5d678f22a59d2c01ca94ea1e2a3b488c38a95a67889ed0a658", - strip_prefix = "gemmlowp-38ebac7b059e84692f53e5938f97a9943c120d98", ) tf_http_archive( name = "farmhash_archive", + build_file = clean_dep("//third_party:farmhash.BUILD"), + sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0", + strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45", urls = [ "https://mirror.bazel.build/github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz", "https://github.com/google/farmhash/archive/816a4ae622e964763ca0862d9dbd19324a1eaf45.tar.gz", ], - sha256 = "6560547c63e4af82b0f202cb710ceabb3f21347a4b996db565a411da5b17aba0", - strip_prefix = "farmhash-816a4ae622e964763ca0862d9dbd19324a1eaf45", - build_file = clean_dep("//third_party:farmhash.BUILD"), ) tf_http_archive( name = "highwayhash", + build_file = clean_dep("//third_party:highwayhash.BUILD"), + sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37", + strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968", urls = [ "http://mirror.bazel.build/github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", "https://github.com/google/highwayhash/archive/fd3d9af80465e4383162e4a7c5e2f406e82dd968.tar.gz", ], - sha256 = "9c3e0e87d581feeb0c18d814d98f170ff23e62967a2bd6855847f0b2fe598a37", - strip_prefix = "highwayhash-fd3d9af80465e4383162e4a7c5e2f406e82dd968", - build_file = clean_dep("//third_party:highwayhash.BUILD"), ) tf_http_archive( name = "nasm", + build_file = clean_dep("//third_party:nasm.BUILD"), + sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011", + strip_prefix = "nasm-2.13.03", + system_build_file = clean_dep("//third_party/systemlibs:nasm.BUILD"), urls = [ "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2", "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.13.03.tar.bz2/sha512/d7a6b4cee8dfd603d8d4c976e5287b5cc542fa0b466ff989b743276a6e28114e64289bf02a7819eca63142a5278aa6eed57773007e5f589e15768e6456a8919d/nasm-2.13.03.tar.bz2", "http://www.nasm.us/pub/nasm/releasebuilds/2.13.03/nasm-2.13.03.tar.bz2", ], - sha256 = "63ec86477ad3f0f6292325fd89e1d93aea2e2fd490070863f17d48f7cd387011", - strip_prefix = "nasm-2.13.03", - build_file = clean_dep("//third_party:nasm.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:nasm.BUILD"), ) tf_http_archive( name = "jpeg", + build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"), + sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b", + strip_prefix = "libjpeg-turbo-2.0.0", + system_build_file = clean_dep("//third_party/systemlibs:jpeg.BUILD"), urls = [ "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz", "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.0.tar.gz", ], - sha256 = "f892fff427ab3adffc289363eac26d197ce3ccacefe5f5822377348a8166069b", - strip_prefix = "libjpeg-turbo-2.0.0", - build_file = clean_dep("//third_party/jpeg:jpeg.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:jpeg.BUILD"), ) tf_http_archive( name = "png_archive", + build_file = clean_dep("//third_party:png.BUILD"), + patch_file = clean_dep("//third_party:png_fix_rpi.patch"), + sha256 = "e45ce5f68b1d80e2cb9a2b601605b374bdf51e1798ef1c2c2bd62131dfcf9eef", + strip_prefix = "libpng-1.6.34", + system_build_file = clean_dep("//third_party/systemlibs:png.BUILD"), urls = [ "https://mirror.bazel.build/github.com/glennrp/libpng/archive/v1.6.34.tar.gz", "https://github.com/glennrp/libpng/archive/v1.6.34.tar.gz", ], - sha256 = "e45ce5f68b1d80e2cb9a2b601605b374bdf51e1798ef1c2c2bd62131dfcf9eef", - strip_prefix = "libpng-1.6.34", - build_file = clean_dep("//third_party:png.BUILD"), - patch_file = clean_dep("//third_party:png_fix_rpi.patch"), - system_build_file = clean_dep("//third_party/systemlibs:png.BUILD"), ) tf_http_archive( name = "org_sqlite", + build_file = clean_dep("//third_party:sqlite.BUILD"), + sha256 = "ad68c1216c3a474cf360c7581a4001e952515b3649342100f2d7ca7c8e313da6", + strip_prefix = "sqlite-amalgamation-3240000", + system_build_file = clean_dep("//third_party/systemlibs:sqlite.BUILD"), urls = [ "https://mirror.bazel.build/www.sqlite.org/2018/sqlite-amalgamation-3240000.zip", "https://www.sqlite.org/2018/sqlite-amalgamation-3240000.zip", ], - sha256 = "ad68c1216c3a474cf360c7581a4001e952515b3649342100f2d7ca7c8e313da6", - strip_prefix = "sqlite-amalgamation-3240000", - build_file = clean_dep("//third_party:sqlite.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:sqlite.BUILD"), ) tf_http_archive( name = "gif_archive", + build_file = clean_dep("//third_party:gif.BUILD"), + sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1", + strip_prefix = "giflib-5.1.4", + system_build_file = clean_dep("//third_party/systemlibs:gif.BUILD"), urls = [ "https://mirror.bazel.build/ufpr.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz", "http://pilotfiber.dl.sourceforge.net/project/giflib/giflib-5.1.4.tar.gz", ], - sha256 = "34a7377ba834397db019e8eb122e551a49c98f49df75ec3fcc92b9a794a4f6d1", - strip_prefix = "giflib-5.1.4", - build_file = clean_dep("//third_party:gif.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:gif.BUILD"), ) tf_http_archive( name = "six_archive", + build_file = clean_dep("//third_party:six.BUILD"), + sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a", + strip_prefix = "six-1.10.0", + system_build_file = clean_dep("//third_party/systemlibs:six.BUILD"), urls = [ "https://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz", "https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz", ], - sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a", - strip_prefix = "six-1.10.0", - build_file = clean_dep("//third_party:six.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:six.BUILD"), ) tf_http_archive( name = "astor_archive", + build_file = clean_dep("//third_party:astor.BUILD"), + sha256 = "ff6d2e2962d834acb125cc4dcc80c54a8c17c253f4cc9d9c43b5102a560bb75d", + strip_prefix = "astor-0.6.2", + system_build_file = clean_dep("//third_party/systemlibs:astor.BUILD"), urls = [ "https://mirror.bazel.build/pypi.python.org/packages/d8/be/c4276b3199ec3feee2a88bc64810fbea8f26d961e0a4cd9c68387a9f35de/astor-0.6.2.tar.gz", "https://pypi.python.org/packages/d8/be/c4276b3199ec3feee2a88bc64810fbea8f26d961e0a4cd9c68387a9f35de/astor-0.6.2.tar.gz", ], - sha256 = "ff6d2e2962d834acb125cc4dcc80c54a8c17c253f4cc9d9c43b5102a560bb75d", - strip_prefix = "astor-0.6.2", - build_file = clean_dep("//third_party:astor.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:astor.BUILD"), ) tf_http_archive( name = "gast_archive", + build_file = clean_dep("//third_party:gast.BUILD"), + sha256 = "7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930", + strip_prefix = "gast-0.2.0", + system_build_file = clean_dep("//third_party/systemlibs:gast.BUILD"), urls = [ "https://mirror.bazel.build/pypi.python.org/packages/5c/78/ff794fcae2ce8aa6323e789d1f8b3b7765f601e7702726f430e814822b96/gast-0.2.0.tar.gz", "https://pypi.python.org/packages/5c/78/ff794fcae2ce8aa6323e789d1f8b3b7765f601e7702726f430e814822b96/gast-0.2.0.tar.gz", ], - sha256 = "7068908321ecd2774f145193c4b34a11305bd104b4551b09273dfd1d6a374930", - strip_prefix = "gast-0.2.0", - build_file = clean_dep("//third_party:gast.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:gast.BUILD"), ) tf_http_archive( name = "termcolor_archive", + build_file = clean_dep("//third_party:termcolor.BUILD"), + sha256 = "1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b", + strip_prefix = "termcolor-1.1.0", + system_build_file = clean_dep("//third_party/systemlibs:termcolor.BUILD"), urls = [ "https://mirror.bazel.build/pypi.python.org/packages/8a/48/a76be51647d0eb9f10e2a4511bf3ffb8cc1e6b14e9e4fab46173aa79f981/termcolor-1.1.0.tar.gz", "https://pypi.python.org/packages/8a/48/a76be51647d0eb9f10e2a4511bf3ffb8cc1e6b14e9e4fab46173aa79f981/termcolor-1.1.0.tar.gz", ], - sha256 = "1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b", - strip_prefix = "termcolor-1.1.0", - build_file = clean_dep("//third_party:termcolor.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:termcolor.BUILD"), ) tf_http_archive( name = "absl_py", - urls = [ - "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz", - "https://github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz", - ], sha256 = "95160f778a62c7a60ddeadc7bf2d83f85a23a27359814aca12cf949e896fa82c", strip_prefix = "abseil-py-pypi-v0.2.2", system_build_file = clean_dep("//third_party/systemlibs:absl_py.BUILD"), @@ -356,17 +352,21 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): "//third_party/systemlibs:absl_py.absl.flags.BUILD": "absl/flags/BUILD", "//third_party/systemlibs:absl_py.absl.testing.BUILD": "absl/testing/BUILD", }, + urls = [ + "https://mirror.bazel.build/github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz", + "https://github.com/abseil/abseil-py/archive/pypi-v0.2.2.tar.gz", + ], ) tf_http_archive( name = "org_python_pypi_backports_weakref", + build_file = clean_dep("//third_party:backports_weakref.BUILD"), + sha256 = "8813bf712a66b3d8b85dc289e1104ed220f1878cf981e2fe756dfaabe9a82892", + strip_prefix = "backports.weakref-1.0rc1/src", urls = [ "https://mirror.bazel.build/pypi.python.org/packages/bc/cc/3cdb0a02e7e96f6c70bd971bc8a90b8463fda83e264fa9c5c1c98ceabd81/backports.weakref-1.0rc1.tar.gz", "https://pypi.python.org/packages/bc/cc/3cdb0a02e7e96f6c70bd971bc8a90b8463fda83e264fa9c5c1c98ceabd81/backports.weakref-1.0rc1.tar.gz", ], - sha256 = "8813bf712a66b3d8b85dc289e1104ed220f1878cf981e2fe756dfaabe9a82892", - strip_prefix = "backports.weakref-1.0rc1/src", - build_file = clean_dep("//third_party:backports_weakref.BUILD"), ) filegroup_external( @@ -389,9 +389,9 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "protobuf_archive", - urls = PROTOBUF_URLS, sha256 = PROTOBUF_SHA256, strip_prefix = PROTOBUF_STRIP_PREFIX, + urls = PROTOBUF_URLS, ) # We need to import the protobuf library under the names com_google_protobuf @@ -399,222 +399,222 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): # Unfortunately there is no way to alias http_archives at the moment. tf_http_archive( name = "com_google_protobuf", - urls = PROTOBUF_URLS, sha256 = PROTOBUF_SHA256, strip_prefix = PROTOBUF_STRIP_PREFIX, + urls = PROTOBUF_URLS, ) tf_http_archive( name = "com_google_protobuf_cc", - urls = PROTOBUF_URLS, sha256 = PROTOBUF_SHA256, strip_prefix = PROTOBUF_STRIP_PREFIX, + urls = PROTOBUF_URLS, ) tf_http_archive( name = "nsync", + sha256 = "692f9b30e219f71a6371b98edd39cef3cbda35ac3abc4cd99ce19db430a5591a", + strip_prefix = "nsync-1.20.1", + system_build_file = clean_dep("//third_party/systemlibs:nsync.BUILD"), urls = [ "https://mirror.bazel.build/github.com/google/nsync/archive/1.20.1.tar.gz", "https://github.com/google/nsync/archive/1.20.1.tar.gz", ], - sha256 = "692f9b30e219f71a6371b98edd39cef3cbda35ac3abc4cd99ce19db430a5591a", - strip_prefix = "nsync-1.20.1", - system_build_file = clean_dep("//third_party/systemlibs:nsync.BUILD"), ) tf_http_archive( name = "com_google_googletest", + sha256 = "353ab86e35cea1cd386115279cf4b16695bbf21b897bfbf2721cf4cb5f64ade8", + strip_prefix = "googletest-997d343dd680e541ef96ce71ee54a91daf2577a0", urls = [ "https://mirror.bazel.build/github.com/google/googletest/archive/997d343dd680e541ef96ce71ee54a91daf2577a0.zip", "https://github.com/google/googletest/archive/997d343dd680e541ef96ce71ee54a91daf2577a0.zip", ], - sha256 = "353ab86e35cea1cd386115279cf4b16695bbf21b897bfbf2721cf4cb5f64ade8", - strip_prefix = "googletest-997d343dd680e541ef96ce71ee54a91daf2577a0", ) tf_http_archive( name = "com_github_gflags_gflags", + sha256 = "ae27cdbcd6a2f935baa78e4f21f675649271634c092b1be01469440495609d0e", + strip_prefix = "gflags-2.2.1", urls = [ "https://mirror.bazel.build/github.com/gflags/gflags/archive/v2.2.1.tar.gz", "https://github.com/gflags/gflags/archive/v2.2.1.tar.gz", ], - sha256 = "ae27cdbcd6a2f935baa78e4f21f675649271634c092b1be01469440495609d0e", - strip_prefix = "gflags-2.2.1", ) tf_http_archive( name = "pcre", + build_file = clean_dep("//third_party:pcre.BUILD"), sha256 = "69acbc2fbdefb955d42a4c606dfde800c2885711d2979e356c0636efde9ec3b5", + strip_prefix = "pcre-8.42", + system_build_file = clean_dep("//third_party/systemlibs:pcre.BUILD"), urls = [ "https://mirror.bazel.build/ftp.exim.org/pub/pcre/pcre-8.42.tar.gz", "http://ftp.exim.org/pub/pcre/pcre-8.42.tar.gz", ], - strip_prefix = "pcre-8.42", - build_file = clean_dep("//third_party:pcre.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:pcre.BUILD"), ) tf_http_archive( name = "swig", + build_file = clean_dep("//third_party:swig.BUILD"), sha256 = "58a475dbbd4a4d7075e5fe86d4e54c9edde39847cdb96a3053d87cb64a23a453", + strip_prefix = "swig-3.0.8", + system_build_file = clean_dep("//third_party/systemlibs:swig.BUILD"), urls = [ "https://mirror.bazel.build/ufpr.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz", "http://ufpr.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz", "http://pilotfiber.dl.sourceforge.net/project/swig/swig/swig-3.0.8/swig-3.0.8.tar.gz", ], - strip_prefix = "swig-3.0.8", - build_file = clean_dep("//third_party:swig.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:swig.BUILD"), ) tf_http_archive( name = "curl", + build_file = clean_dep("//third_party:curl.BUILD"), sha256 = "e9c37986337743f37fd14fe8737f246e97aec94b39d1b71e8a5973f72a9fc4f5", + strip_prefix = "curl-7.60.0", + system_build_file = clean_dep("//third_party/systemlibs:curl.BUILD"), urls = [ "https://mirror.bazel.build/curl.haxx.se/download/curl-7.60.0.tar.gz", "https://curl.haxx.se/download/curl-7.60.0.tar.gz", ], - strip_prefix = "curl-7.60.0", - build_file = clean_dep("//third_party:curl.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:curl.BUILD"), ) tf_http_archive( name = "grpc", + sha256 = "50db9cf2221354485eb7c3bd55a4c27190caef7048a2a1a15fbe60a498f98b44", + strip_prefix = "grpc-1.13.0", + system_build_file = clean_dep("//third_party/systemlibs:grpc.BUILD"), urls = [ "https://mirror.bazel.build/github.com/grpc/grpc/archive/v1.13.0.tar.gz", "https://github.com/grpc/grpc/archive/v1.13.0.tar.gz", ], - sha256 = "50db9cf2221354485eb7c3bd55a4c27190caef7048a2a1a15fbe60a498f98b44", - strip_prefix = "grpc-1.13.0", - system_build_file = clean_dep("//third_party/systemlibs:grpc.BUILD"), ) tf_http_archive( name = "linenoise", + build_file = clean_dep("//third_party:linenoise.BUILD"), sha256 = "7f51f45887a3d31b4ce4fa5965210a5e64637ceac12720cfce7954d6a2e812f7", + strip_prefix = "linenoise-c894b9e59f02203dbe4e2be657572cf88c4230c3", urls = [ "https://mirror.bazel.build/github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz", "https://github.com/antirez/linenoise/archive/c894b9e59f02203dbe4e2be657572cf88c4230c3.tar.gz", ], - strip_prefix = "linenoise-c894b9e59f02203dbe4e2be657572cf88c4230c3", - build_file = clean_dep("//third_party:linenoise.BUILD"), ) # TODO(phawkins): currently, this rule uses an unofficial LLVM mirror. # Switch to an official source of snapshots if/when possible. tf_http_archive( name = "llvm", + build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"), + sha256 = "a4f8bfe7e3e69069934a87e612a1d4d3b8b6af13e0f1213a42a6046e1bcd50d8", + strip_prefix = "llvm-d3429e96fe1e45b1dc0106463832523f37faf271", urls = [ "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/d3429e96fe1e45b1dc0106463832523f37faf271.tar.gz", "https://github.com/llvm-mirror/llvm/archive/d3429e96fe1e45b1dc0106463832523f37faf271.tar.gz", ], - sha256 = "a4f8bfe7e3e69069934a87e612a1d4d3b8b6af13e0f1213a42a6046e1bcd50d8", - strip_prefix = "llvm-d3429e96fe1e45b1dc0106463832523f37faf271", - build_file = clean_dep("//third_party/llvm:llvm.autogenerated.BUILD"), ) tf_http_archive( name = "lmdb", + build_file = clean_dep("//third_party:lmdb.BUILD"), + sha256 = "f3927859882eb608868c8c31586bb7eb84562a40a6bf5cc3e13b6b564641ea28", + strip_prefix = "lmdb-LMDB_0.9.22/libraries/liblmdb", + system_build_file = clean_dep("//third_party/systemlibs:lmdb.BUILD"), urls = [ "https://mirror.bazel.build/github.com/LMDB/lmdb/archive/LMDB_0.9.22.tar.gz", "https://github.com/LMDB/lmdb/archive/LMDB_0.9.22.tar.gz", ], - sha256 = "f3927859882eb608868c8c31586bb7eb84562a40a6bf5cc3e13b6b564641ea28", - strip_prefix = "lmdb-LMDB_0.9.22/libraries/liblmdb", - build_file = clean_dep("//third_party:lmdb.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:lmdb.BUILD"), ) tf_http_archive( name = "jsoncpp_git", + build_file = clean_dep("//third_party:jsoncpp.BUILD"), + sha256 = "c49deac9e0933bcb7044f08516861a2d560988540b23de2ac1ad443b219afdb6", + strip_prefix = "jsoncpp-1.8.4", + system_build_file = clean_dep("//third_party/systemlibs:jsoncpp.BUILD"), urls = [ "https://mirror.bazel.build/github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz", "https://github.com/open-source-parsers/jsoncpp/archive/1.8.4.tar.gz", ], - sha256 = "c49deac9e0933bcb7044f08516861a2d560988540b23de2ac1ad443b219afdb6", - strip_prefix = "jsoncpp-1.8.4", - build_file = clean_dep("//third_party:jsoncpp.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:jsoncpp.BUILD"), ) tf_http_archive( name = "boringssl", + sha256 = "1188e29000013ed6517168600fc35a010d58c5d321846d6a6dfee74e4c788b45", + strip_prefix = "boringssl-7f634429a04abc48e2eb041c81c5235816c96514", + system_build_file = clean_dep("//third_party/systemlibs:boringssl.BUILD"), urls = [ "https://mirror.bazel.build/github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c5235816c96514.tar.gz", "https://github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c5235816c96514.tar.gz", ], - sha256 = "1188e29000013ed6517168600fc35a010d58c5d321846d6a6dfee74e4c788b45", - strip_prefix = "boringssl-7f634429a04abc48e2eb041c81c5235816c96514", - system_build_file = clean_dep("//third_party/systemlibs:boringssl.BUILD"), ) tf_http_archive( name = "zlib_archive", + build_file = clean_dep("//third_party:zlib.BUILD"), + sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", + strip_prefix = "zlib-1.2.11", + system_build_file = clean_dep("//third_party/systemlibs:zlib.BUILD"), urls = [ "https://mirror.bazel.build/zlib.net/zlib-1.2.11.tar.gz", "https://zlib.net/zlib-1.2.11.tar.gz", ], - sha256 = "c3e5e9fdd5004dcb542feda5ee4f0ff0744628baf8ed2dd5d66f8ca1197cb1a1", - strip_prefix = "zlib-1.2.11", - build_file = clean_dep("//third_party:zlib.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:zlib.BUILD"), ) tf_http_archive( name = "fft2d", + build_file = clean_dep("//third_party/fft2d:fft2d.BUILD"), + sha256 = "52bb637c70b971958ec79c9c8752b1df5ff0218a4db4510e60826e0cb79b5296", urls = [ "https://mirror.bazel.build/www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz", "http://www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz", ], - sha256 = "52bb637c70b971958ec79c9c8752b1df5ff0218a4db4510e60826e0cb79b5296", - build_file = clean_dep("//third_party/fft2d:fft2d.BUILD"), ) tf_http_archive( name = "snappy", + build_file = clean_dep("//third_party:snappy.BUILD"), + sha256 = "3dfa02e873ff51a11ee02b9ca391807f0c8ea0529a4924afa645fbf97163f9d4", + strip_prefix = "snappy-1.1.7", + system_build_file = clean_dep("//third_party/systemlibs:snappy.BUILD"), urls = [ "https://mirror.bazel.build/github.com/google/snappy/archive/1.1.7.tar.gz", "https://github.com/google/snappy/archive/1.1.7.tar.gz", ], - sha256 = "3dfa02e873ff51a11ee02b9ca391807f0c8ea0529a4924afa645fbf97163f9d4", - strip_prefix = "snappy-1.1.7", - build_file = clean_dep("//third_party:snappy.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:snappy.BUILD"), ) tf_http_archive( name = "nccl_archive", + build_file = clean_dep("//third_party:nccl/nccl_archive.BUILD"), + sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176", + strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7", urls = [ "https://mirror.bazel.build/github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz", "https://github.com/nvidia/nccl/archive/03d856977ecbaac87e598c0c4bafca96761b9ac7.tar.gz", ], - sha256 = "2ca86fb6179ecbff789cc67c836139c1bbc0324ed8c04643405a30bf26325176", - strip_prefix = "nccl-03d856977ecbaac87e598c0c4bafca96761b9ac7", - build_file = clean_dep("//third_party:nccl/nccl_archive.BUILD"), ) tf_http_archive( name = "kafka", + build_file = clean_dep("//third_party:kafka/BUILD"), + patch_file = clean_dep("//third_party/kafka:config.patch"), + sha256 = "cc6ebbcd0a826eec1b8ce1f625ffe71b53ef3290f8192b6cae38412a958f4fd3", + strip_prefix = "librdkafka-0.11.5", urls = [ "https://mirror.bazel.build/github.com/edenhill/librdkafka/archive/v0.11.5.tar.gz", "https://github.com/edenhill/librdkafka/archive/v0.11.5.tar.gz", ], - sha256 = "cc6ebbcd0a826eec1b8ce1f625ffe71b53ef3290f8192b6cae38412a958f4fd3", - strip_prefix = "librdkafka-0.11.5", - build_file = clean_dep("//third_party:kafka/BUILD"), - patch_file = clean_dep("//third_party/kafka:config.patch"), ) tf_http_archive( name = "aws", + build_file = clean_dep("//third_party:aws.BUILD"), + sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c", + strip_prefix = "aws-sdk-cpp-1.3.15", urls = [ "https://mirror.bazel.build/github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz", "https://github.com/aws/aws-sdk-cpp/archive/1.3.15.tar.gz", ], - sha256 = "b888d8ce5fc10254c3dd6c9020c7764dd53cf39cf011249d0b4deda895de1b7c", - strip_prefix = "aws-sdk-cpp-1.3.15", - build_file = clean_dep("//third_party:aws.BUILD"), ) java_import_external( @@ -644,14 +644,14 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "jemalloc", + build_file = clean_dep("//third_party:jemalloc.BUILD"), + sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8", + strip_prefix = "jemalloc-4.4.0", + system_build_file = clean_dep("//third_party/systemlibs:jemalloc.BUILD"), urls = [ "https://mirror.bazel.build/github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz", "https://github.com/jemalloc/jemalloc/archive/4.4.0.tar.gz", ], - sha256 = "3c8f25c02e806c3ce0ab5fb7da1817f89fc9732709024e2a81b6b82f7cc792a8", - strip_prefix = "jemalloc-4.4.0", - build_file = clean_dep("//third_party:jemalloc.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:jemalloc.BUILD"), ) java_import_external( @@ -700,196 +700,196 @@ def tf_workspace(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_google_pprof", + build_file = clean_dep("//third_party:pprof.BUILD"), + sha256 = "e0928ca4aa10ea1e0551e2d7ce4d1d7ea2d84b2abbdef082b0da84268791d0c4", + strip_prefix = "pprof-c0fb62ec88c411cc91194465e54db2632845b650", urls = [ "https://mirror.bazel.build/github.com/google/pprof/archive/c0fb62ec88c411cc91194465e54db2632845b650.tar.gz", "https://github.com/google/pprof/archive/c0fb62ec88c411cc91194465e54db2632845b650.tar.gz", ], - sha256 = "e0928ca4aa10ea1e0551e2d7ce4d1d7ea2d84b2abbdef082b0da84268791d0c4", - strip_prefix = "pprof-c0fb62ec88c411cc91194465e54db2632845b650", - build_file = clean_dep("//third_party:pprof.BUILD"), ) tf_http_archive( name = "cub_archive", + build_file = clean_dep("//third_party:cub.BUILD"), + sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3", + strip_prefix = "cub-1.8.0", urls = [ "https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.8.0.zip", "https://github.com/NVlabs/cub/archive/1.8.0.zip", ], - sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3", - strip_prefix = "cub-1.8.0", - build_file = clean_dep("//third_party:cub.BUILD"), ) tf_http_archive( name = "cython", + build_file = clean_dep("//third_party:cython.BUILD"), + delete = ["BUILD.bazel"], sha256 = "bccc9aa050ea02595b2440188813b936eaf345e85fb9692790cecfe095cf91aa", + strip_prefix = "cython-0.28.4", + system_build_file = clean_dep("//third_party/systemlibs:cython.BUILD"), urls = [ "https://mirror.bazel.build/github.com/cython/cython/archive/0.28.4.tar.gz", "https://github.com/cython/cython/archive/0.28.4.tar.gz", ], - strip_prefix = "cython-0.28.4", - build_file = clean_dep("//third_party:cython.BUILD"), - delete = ["BUILD.bazel"], - system_build_file = clean_dep("//third_party/systemlibs:cython.BUILD"), ) tf_http_archive( name = "bazel_toolchains", + sha256 = "3b604699685c5c65dd3f6f17425570a4b2f00ddba2f750db15acc72e55bb098b", + strip_prefix = "bazel-toolchains-37acf1841ab1475c98a152cb9e446460c8ae29e1", urls = [ "https://mirror.bazel.build/github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz", "https://github.com/bazelbuild/bazel-toolchains/archive/37acf1841ab1475c98a152cb9e446460c8ae29e1.tar.gz", ], - strip_prefix = "bazel-toolchains-37acf1841ab1475c98a152cb9e446460c8ae29e1", - sha256 = "3b604699685c5c65dd3f6f17425570a4b2f00ddba2f750db15acc72e55bb098b", ) tf_http_archive( name = "arm_neon_2_x86_sse", + build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"), sha256 = "c8d90aa4357f8079d427e87a6f4c493da1fa4140aee926c05902d7ec1533d9a5", strip_prefix = "ARM_NEON_2_x86_SSE-0f77d9d182265259b135dad949230ecbf1a2633d", urls = [ "https://mirror.bazel.build/github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz", "https://github.com/intel/ARM_NEON_2_x86_SSE/archive/0f77d9d182265259b135dad949230ecbf1a2633d.tar.gz", ], - build_file = clean_dep("//third_party:arm_neon_2_x86_sse.BUILD"), ) tf_http_archive( name = "double_conversion", + build_file = clean_dep("//third_party:double_conversion.BUILD"), + sha256 = "2f7fbffac0d98d201ad0586f686034371a6d152ca67508ab611adc2386ad30de", + strip_prefix = "double-conversion-3992066a95b823efc8ccc1baf82a1cfc73f6e9b8", + system_build_file = clean_dep("//third_party/systemlibs:double_conversion.BUILD"), urls = [ "https://mirror.bazel.build/github.com/google/double-conversion/archive/3992066a95b823efc8ccc1baf82a1cfc73f6e9b8.zip", "https://github.com/google/double-conversion/archive/3992066a95b823efc8ccc1baf82a1cfc73f6e9b8.zip", ], - sha256 = "2f7fbffac0d98d201ad0586f686034371a6d152ca67508ab611adc2386ad30de", - strip_prefix = "double-conversion-3992066a95b823efc8ccc1baf82a1cfc73f6e9b8", - build_file = clean_dep("//third_party:double_conversion.BUILD"), - system_build_file = clean_dep("//third_party/systemlibs:double_conversion.BUILD"), ) tf_http_archive( name = "tflite_mobilenet", + build_file = clean_dep("//third_party:tflite_mobilenet.BUILD"), sha256 = "23f814d1c076bdf03715dfb6cab3713aa4fbdf040fd5448c43196bd2e97a4c1b", urls = [ "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip", ], - build_file = clean_dep("//third_party:tflite_mobilenet.BUILD"), ) tf_http_archive( name = "tflite_mobilenet_ssd", + build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), sha256 = "767057f2837a46d97882734b03428e8dd640b93236052b312b2f0e45613c1cf0", urls = [ "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_ssd_tflite_v1.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_ssd_tflite_v1.zip", ], - build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), ) tf_http_archive( name = "tflite_mobilenet_ssd_quant", + build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), sha256 = "a809cd290b4d6a2e8a9d5dad076e0bd695b8091974e0eed1052b480b2f21b6dc", urls = [ "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_0.75_quant_2018_06_29.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_0.75_quant_2018_06_29.zip", ], - build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), ) tf_http_archive( name = "tflite_mobilenet_ssd_quant_protobuf", + build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), sha256 = "09280972c5777f1aa775ef67cb4ac5d5ed21970acd8535aeca62450ef14f0d79", + strip_prefix = "ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18", urls = [ "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18.tar.gz", "http://storage.googleapis.com/download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18.tar.gz", ], - strip_prefix = "ssd_mobilenet_v1_quantized_300x300_coco14_sync_2018_07_18", - build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), ) tf_http_archive( name = "tflite_conv_actions_frozen", + build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), sha256 = "d947b38cba389b5e2d0bfc3ea6cc49c784e187b41a071387b3742d1acac7691e", urls = [ "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/conv_actions_tflite.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/conv_actions_tflite.zip", ], - build_file = str(Label("//third_party:tflite_mobilenet.BUILD")), ) tf_http_archive( name = "tflite_smartreply", + build_file = clean_dep("//third_party:tflite_smartreply.BUILD"), sha256 = "8980151b85a87a9c1a3bb1ed4748119e4a85abd3cb5744d83da4d4bd0fbeef7c", urls = [ "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip", "https://storage.googleapis.com/download.tensorflow.org/models/tflite/smartreply_1.0_2017_11_01.zip", ], - build_file = clean_dep("//third_party:tflite_smartreply.BUILD"), ) tf_http_archive( name = "tflite_ovic_testdata", + build_file = clean_dep("//third_party:tflite_ovic_testdata.BUILD"), sha256 = "a9a705d8d519220178e2e65d383fdb21da37fdb31d1e909b0a1acdac46479e9c", + strip_prefix = "ovic", urls = [ "https://mirror.bazel.build/storage.googleapis.com/download.tensorflow.org/data/ovic.zip", "https://storage.googleapis.com/download.tensorflow.org/data/ovic.zip", ], - build_file = clean_dep("//third_party:tflite_ovic_testdata.BUILD"), - strip_prefix = "ovic", ) tf_http_archive( name = "build_bazel_rules_android", sha256 = "cd06d15dd8bb59926e4d65f9003bfc20f9da4b2519985c27e190cddc8b7a7806", + strip_prefix = "rules_android-0.1.1", urls = [ "https://mirror.bazel.build/github.com/bazelbuild/rules_android/archive/v0.1.1.zip", "https://github.com/bazelbuild/rules_android/archive/v0.1.1.zip", ], - strip_prefix = "rules_android-0.1.1", ) tf_http_archive( name = "tbb", + build_file = clean_dep("//third_party/ngraph:tbb.BUILD"), + sha256 = "724686f90bcda78f13b76f297d964008737ccd6399328143c1c0093e73ae6a13", + strip_prefix = "tbb-tbb_2018", urls = [ "https://mirror.bazel.build/github.com/01org/tbb/archive/tbb_2018.zip", "https://github.com/01org/tbb/archive/tbb_2018.zip", ], - sha256 = "724686f90bcda78f13b76f297d964008737ccd6399328143c1c0093e73ae6a13", - strip_prefix = "tbb-tbb_2018", - build_file = clean_dep("//third_party/ngraph:tbb.BUILD"), ) tf_http_archive( name = "ngraph", + build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), + sha256 = "bf9dcc88e5c66021e3aac80491a231711211540d613bf9b6bd28db3f5bb86b62", + strip_prefix = "ngraph-0.8.1", urls = [ "https://mirror.bazel.build/github.com/NervanaSystems/ngraph/archive/v0.8.1.tar.gz", "https://github.com/NervanaSystems/ngraph/archive/v0.8.1.tar.gz", ], - sha256 = "bf9dcc88e5c66021e3aac80491a231711211540d613bf9b6bd28db3f5bb86b62", - strip_prefix = "ngraph-0.8.1", - build_file = clean_dep("//third_party/ngraph:ngraph.BUILD"), ) tf_http_archive( name = "nlohmann_json_lib", + build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"), + sha256 = "9f3549824af3ca7e9707a2503959886362801fb4926b869789d6929098a79e47", + strip_prefix = "json-3.1.1", urls = [ "https://mirror.bazel.build/github.com/nlohmann/json/archive/v3.1.1.tar.gz", "https://github.com/nlohmann/json/archive/v3.1.1.tar.gz", ], - sha256 = "9f3549824af3ca7e9707a2503959886362801fb4926b869789d6929098a79e47", - strip_prefix = "json-3.1.1", - build_file = clean_dep("//third_party/ngraph:nlohmann_json.BUILD"), ) tf_http_archive( name = "ngraph_tf", + build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), + sha256 = "402f84c748c113780a60f35f39aab118435285543aee4900d712b76fbf8a21ee", + strip_prefix = "ngraph-tf-0.6.1", urls = [ "https://mirror.bazel.build/github.com/NervanaSystems/ngraph-tf/archive/v0.6.1.tar.gz", "https://github.com/NervanaSystems/ngraph-tf/archive/v0.6.1.tar.gz", ], - sha256 = "402f84c748c113780a60f35f39aab118435285543aee4900d712b76fbf8a21ee", - strip_prefix = "ngraph-tf-0.6.1", - build_file = clean_dep("//third_party/ngraph:ngraph_tf.BUILD"), ) ############################################################################## -- GitLab From d56c298f1ef14b5a738e1e0b7bbc66fcd736be3e Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 27 Sep 2018 21:57:48 -0700 Subject: [PATCH 128/570] Remove AWS, GCP, Kafka and HDFS options from configure. Make them default on for supported operating systems. PiperOrigin-RevId: 214886845 --- configure.py | 12 --- tensorflow/BUILD | 96 ------------------- tensorflow/contrib/BUILD | 50 +++------- .../core/platform/default/build_config.bzl | 45 ++++----- tensorflow/tools/lib_package/BUILD | 38 ++++---- tensorflow/tools/pip_package/BUILD | 27 ++---- 6 files changed, 55 insertions(+), 213 deletions(-) diff --git a/configure.py b/configure.py index f71caa1994..55fce8b93b 100644 --- a/configure.py +++ b/configure.py @@ -1488,11 +1488,7 @@ def main(): setup_python(environ_cp) if is_windows(): - environ_cp['TF_NEED_AWS'] = '0' - environ_cp['TF_NEED_GCP'] = '0' - environ_cp['TF_NEED_HDFS'] = '0' environ_cp['TF_NEED_JEMALLOC'] = '0' - environ_cp['TF_NEED_KAFKA'] = '0' environ_cp['TF_NEED_OPENCL_SYCL'] = '0' environ_cp['TF_NEED_COMPUTECPP'] = '0' environ_cp['TF_NEED_OPENCL'] = '0' @@ -1518,14 +1514,6 @@ def main(): set_build_var(environ_cp, 'TF_NEED_JEMALLOC', 'jemalloc as malloc', 'with_jemalloc', True) - set_build_var(environ_cp, 'TF_NEED_GCP', 'Google Cloud Platform', - 'with_gcp_support', True, 'gcp') - set_build_var(environ_cp, 'TF_NEED_HDFS', 'Hadoop File System', - 'with_hdfs_support', True, 'hdfs') - set_build_var(environ_cp, 'TF_NEED_AWS', 'Amazon AWS Platform', - 'with_aws_support', True, 'aws') - set_build_var(environ_cp, 'TF_NEED_KAFKA', 'Apache Kafka Platform', - 'with_kafka_support', True, 'kafka') set_build_var(environ_cp, 'TF_ENABLE_XLA', 'XLA JIT', 'with_xla_support', False, 'xla') diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 3610eea42a..5f73da68a2 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -224,60 +224,6 @@ config_setting( visibility = ["//visibility:public"], ) -config_setting( - name = "with_gcp_support", - define_values = {"with_gcp_support": "true"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support", - define_values = {"with_hdfs_support": "true"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_aws_support", - define_values = {"with_aws_support": "true"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_kafka_support", - define_values = {"with_kafka_support": "true"}, - visibility = ["//visibility:public"], -) - -# Crosses between platforms and file system libraries not supported on those -# platforms due to limitations in nested select() statements. -config_setting( - name = "with_gcp_support_windows_override", - define_values = {"with_gcp_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support_windows_override", - define_values = {"with_hdfs_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_aws_support_windows_override", - define_values = {"with_aws_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_kafka_support_windows_override", - define_values = {"with_kafka_support": "true"}, - values = {"cpu": "x64_windows"}, - visibility = ["//visibility:public"], -) - config_setting( name = "with_cuda_support_windows_override", define_values = {"using_cuda_nvcc": "true"}, @@ -285,48 +231,6 @@ config_setting( visibility = ["//visibility:public"], ) -config_setting( - name = "with_gcp_support_android_override", - define_values = {"with_gcp_support": "true"}, - values = {"crosstool_top": "//external:android/crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support_android_override", - define_values = {"with_hdfs_support": "true"}, - values = {"crosstool_top": "//external:android/crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_aws_support_android_override", - define_values = {"with_aws_support": "true"}, - values = {"crosstool_top": "//external:android/crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_gcp_support_ios_override", - define_values = {"with_gcp_support": "true"}, - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_hdfs_support_ios_override", - define_values = {"with_hdfs_support": "true"}, - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, - visibility = ["//visibility:public"], -) - -config_setting( - name = "with_aws_support_ios_override", - define_values = {"with_aws_support": "true"}, - values = {"crosstool_top": "//tools/osx/crosstool:crosstool"}, - visibility = ["//visibility:public"], -) - config_setting( name = "with_xla_support", define_values = {"with_xla_support": "true"}, diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index ae5ca32bcf..1a9ae8ac3a 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -112,26 +112,14 @@ py_library( "//tensorflow/python:util", "//tensorflow/python/estimator:estimator_py", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + select({ - "//tensorflow:with_kafka_support_windows_override": [], - "//tensorflow:with_kafka_support": [ - "//tensorflow/contrib/kafka", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_aws_support_windows_override": [], - "//tensorflow:with_aws_support": [ - "//tensorflow/contrib/kinesis", - ], - "//conditions:default": [], - }) + if_not_windows_cuda([ - "//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols - ]) + if_not_windows([ - ]) + select({ "//tensorflow:linux_s390x": [], "//tensorflow:windows": [], "//conditions:default": [ "//tensorflow/contrib/bigtable", "//tensorflow/contrib/cloud:cloud_py", + "//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols + "//tensorflow/contrib/kafka", + "//tensorflow/contrib/kinesis", "//tensorflow/contrib/tensorrt:init_py", "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", ], @@ -159,20 +147,14 @@ cc_library( ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_cuda([ "//tensorflow/contrib/nccl:nccl_kernels", ]) + select({ - "//tensorflow:with_kafka_support_windows_override": [], - "//tensorflow:with_kafka_support": [ + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//conditions:default": [ "//tensorflow/contrib/kafka:dataset_kernels", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_aws_support_windows_override": [], - "//tensorflow:with_aws_support": [ "//tensorflow/contrib/kinesis:dataset_kernels", + "//tensorflow/contrib/tensorrt:trt_engine_op_kernel", ], - "//conditions:default": [], - }) + if_not_windows([ - "//tensorflow/contrib/tensorrt:trt_engine_op_kernel", - ]), + }), ) cc_library( @@ -198,18 +180,12 @@ cc_library( "//tensorflow/contrib/text:all_ops", "//tensorflow/contrib/tpu:all_ops", ] + select({ - "//tensorflow:with_kafka_support_windows_override": [], - "//tensorflow:with_kafka_support": [ + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//conditions:default": [ "//tensorflow/contrib/kafka:dataset_ops_op_lib", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_aws_support_windows_override": [], - "//tensorflow:with_aws_support": [ "//tensorflow/contrib/kinesis:dataset_ops_op_lib", + "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib", ], - "//conditions:default": [], - }) + if_not_windows([ - "//tensorflow/contrib/tensorrt:trt_engine_op_op_lib", - ]), + }), ) diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index bb841aeab7..3b14757945 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -641,54 +641,41 @@ def tf_additional_lib_deps(): def tf_additional_core_deps(): return select({ - "//tensorflow:with_gcp_support_android_override": [], - "//tensorflow:with_gcp_support_ios_override": [], - "//tensorflow:with_gcp_support": [ + "//tensorflow:android": [], + "//tensorflow:windows": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//conditions:default": [ "//tensorflow/core/platform/cloud:gcs_file_system", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_hdfs_support_windows_override": [], - "//tensorflow:with_hdfs_support_android_override": [], - "//tensorflow:with_hdfs_support_ios_override": [], - "//tensorflow:with_hdfs_support": [ - "//tensorflow/core/platform/hadoop:hadoop_file_system", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_aws_support_windows_override": [], - "//tensorflow:with_aws_support_android_override": [], - "//tensorflow:with_aws_support_ios_override": [], - "//tensorflow:with_aws_support": [ "//tensorflow/core/platform/s3:s3_file_system", + "//tensorflow/core/platform/hadoop:hadoop_file_system", ], - "//conditions:default": [], }) # TODO(jart, jhseu): Delete when GCP is default on. def tf_additional_cloud_op_deps(): return select({ - "//tensorflow:with_gcp_support_windows_override": [], - "//tensorflow:with_gcp_support_android_override": [], - "//tensorflow:with_gcp_support_ios_override": [], - "//tensorflow:with_gcp_support": [ + "//tensorflow:android": [], + "//tensorflow:windows": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//conditions:default": [ "//tensorflow/contrib/cloud:bigquery_reader_ops_op_lib", "//tensorflow/contrib/cloud:gcs_config_ops_op_lib", ], - "//conditions:default": [], }) # TODO(jart, jhseu): Delete when GCP is default on. def tf_additional_cloud_kernel_deps(): return select({ - "//tensorflow:with_gcp_support_windows_override": [], - "//tensorflow:with_gcp_support_android_override": [], - "//tensorflow:with_gcp_support_ios_override": [], - "//tensorflow:with_gcp_support": [ + "//tensorflow:android": [], + "//tensorflow:windows": [], + "//tensorflow:ios": [], + "//tensorflow:linux_s390x": [], + "//conditions:default": [ "//tensorflow/contrib/cloud/kernels:bigquery_reader_ops", "//tensorflow/contrib/cloud/kernels:gcs_config_ops", ], - "//conditions:default": [], }) def tf_lib_proto_parsing_deps(): diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index 095ac1f4cc..b9f4902639 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -137,16 +137,6 @@ genrule( "@snappy//:COPYING", "@zlib_archive//:zlib.h", ] + select({ - "//tensorflow:with_aws_support": [ - "@aws//:LICENSE", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_gcp_support": [ - "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE", - ], - "//conditions:default": [], - }) + select({ "//tensorflow:with_jemalloc_linux_x86_64": [ "@jemalloc//:COPYING", ], @@ -171,7 +161,14 @@ genrule( "@grpc//third_party/nanopb:LICENSE.txt", "@grpc//third_party/address_sorting:LICENSE", ], - ), + ) + select({ + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//conditions:default": [ + "@aws//:LICENSE", + "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE", + ], + }), outs = ["include/tensorflow/c/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", tools = [":concat_licenses.sh"], @@ -205,16 +202,6 @@ genrule( "@snappy//:COPYING", "@zlib_archive//:zlib.h", ] + select({ - "//tensorflow:with_aws_support": [ - "@aws//:LICENSE", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_gcp_support": [ - "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE", - ], - "//conditions:default": [], - }) + select({ "//tensorflow:with_jemalloc_linux_x86_64": [ "@jemalloc//:COPYING", ], @@ -232,7 +219,14 @@ genrule( ]) + if_mkl([ "//third_party/mkl:LICENSE", "//third_party/mkl_dnn:LICENSE", - ]), + ]) + select({ + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//conditions:default": [ + "@aws//:LICENSE", + "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE", + ], + }), outs = ["include/tensorflow/jni/LICENSE"], cmd = "$(location :concat_licenses.sh) $(SRCS) >$@", tools = [":concat_licenses.sh"], diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index cce60ccea0..f1de22300b 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -169,17 +169,6 @@ filegroup( "@zlib_archive//:zlib.h", "@org_python_pypi_backports_weakref//:LICENSE", ] + select({ - "//tensorflow:with_aws_support": [ - "@aws//:LICENSE", - ], - "//conditions:default": [], - }) + select({ - "//tensorflow:with_gcp_support": [ - "@com_github_googleapis_googleapis//:LICENSE", - "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE", - ], - "//conditions:default": [], - }) + select({ "//tensorflow:with_jemalloc_linux_x86_64": [ "@jemalloc//:COPYING", ], @@ -187,11 +176,6 @@ filegroup( "@jemalloc//:COPYING", ], "//conditions:default": [], - }) + select({ - "//tensorflow:with_kafka_support": [ - "@kafka//:LICENSE", - ], - "//conditions:default": [], }) + select({ "//tensorflow/core/kernels:xsmm": [ "@libxsmm_archive//:LICENSE.md", @@ -215,7 +199,16 @@ filegroup( "@ngraph_tf//:LICENSE", "@nlohmann_json_lib//:LICENSE.MIT", "@tbb//:LICENSE", - ]) + tf_additional_license_deps(), + ]) + tf_additional_license_deps() + select({ + "//tensorflow:linux_s390x": [], + "//tensorflow:windows": [], + "//conditions:default": [ + "@aws//:LICENSE", + "@com_github_googleapis_googleapis//:LICENSE", + "@com_github_googlecloudplatform_google_cloud_cpp//:LICENSE", + "@kafka//:LICENSE", + ], + }), ) sh_binary( -- GitLab From 6ebe9baae06c06d0a70a424a55c78f5af07b49f7 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Thu, 27 Sep 2018 22:57:39 -0700 Subject: [PATCH 129/570] Fix error that occurs when attempting to use TensorFlow optimizers with Keras and DistributionStrategy PiperOrigin-RevId: 214890580 --- .../contrib/distribute/python/combinations.py | 3 + .../contrib/distribute/python/keras_test.py | 121 ++++--- tensorflow/python/keras/engine/training.py | 3 +- .../keras/engine/training_distributed.py | 341 +++++++++--------- 4 files changed, 240 insertions(+), 228 deletions(-) diff --git a/tensorflow/contrib/distribute/python/combinations.py b/tensorflow/contrib/distribute/python/combinations.py index 244d1fcec8..82ca041cc2 100644 --- a/tensorflow/contrib/distribute/python/combinations.py +++ b/tensorflow/contrib/distribute/python/combinations.py @@ -59,6 +59,7 @@ from tensorflow.python.training import adagrad from tensorflow.python.training import adam from tensorflow.python.training import distribution_strategy_context from tensorflow.python.training import gradient_descent +from tensorflow.python.training import rmsprop from tensorflow.python.util import tf_inspect @@ -354,6 +355,8 @@ gradient_descent_optimizer_v1_fn = NamedObject( "GradientDescentV1", lambda: gradient_descent.GradientDescentOptimizer(0.2)) adagrad_optimizer_v1_fn = NamedObject( "AdagradV1", lambda: adagrad.AdagradOptimizer(0.001)) +rmsprop_optimizer_v1_fn = NamedObject( + "RmsPropV1", lambda: rmsprop.RMSPropOptimizer(0.001)) optimizers_v1 = [adam_optimizer_v1_fn, gradient_descent_optimizer_v1_fn, adagrad_optimizer_v1_fn] diff --git a/tensorflow/contrib/distribute/python/keras_test.py b/tensorflow/contrib/distribute/python/keras_test.py index a0b8bde132..3aab2c521f 100644 --- a/tensorflow/contrib/distribute/python/keras_test.py +++ b/tensorflow/contrib/distribute/python/keras_test.py @@ -173,13 +173,42 @@ def batch_wrapper(dataset, batch_size, distribution): return dataset.batch(batch_size) -def all_combinations(): +def get_model(): + x = keras.layers.Input(shape=(3,), name='input') + y = keras.layers.Dense(4, name='dense')(x) + model = keras.Model(x, y) + return model + + +def get_dataset(distribution): + inputs = np.zeros((10, 3), dtype=np.float32) + targets = np.zeros((10, 4), dtype=np.float32) + dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = dataset.repeat(100) + dataset = batch_wrapper(dataset, 10, distribution) + return dataset + + +strategies = [combinations.default_strategy, + combinations.one_device_strategy, + combinations.mirrored_strategy_with_gpu_and_cpu, + combinations.mirrored_strategy_with_two_gpus, + combinations.tpu_strategy_one_step] + + +def strategy_combinations(): return combinations.combine( - distribution=[combinations.default_strategy, - combinations.one_device_strategy, - combinations.mirrored_strategy_with_gpu_and_cpu, - combinations.mirrored_strategy_with_two_gpus, - combinations.tpu_strategy_one_step], + distribution=strategies, + mode=['graph']) + + +def strategy_and_optimizer_combinations(): + return combinations.combine( + distribution=strategies, + optimizer=[combinations.adagrad_optimizer_v1_fn, + combinations.adam_optimizer_v1_fn, + combinations.gradient_descent_optimizer_v1_fn, + combinations.rmsprop_optimizer_v1_fn], mode=['graph']) @@ -360,9 +389,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): def test_calling_model_with_numpy_arrays(self): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' @@ -392,23 +419,17 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): # with batch_size model.predict(inputs, batch_size=8) - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) def test_calling_model_on_same_dataset(self, distribution): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = batch_wrapper(dataset, 10, distribution) + dataset = get_dataset(distribution) # Call fit with validation data model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, @@ -461,23 +482,17 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): model.fit(dataset_dict, epochs=1, steps_per_epoch=2, verbose=1) - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) def test_fit_eval_and_predict_methods_on_dataset(self, distribution): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' metrics = ['mae', keras.metrics.CategoricalAccuracy()] model.compile(optimizer, loss, metrics=metrics, distribute=distribution) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = batch_wrapper(dataset, 10, distribution) + dataset = get_dataset(distribution) model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) model.evaluate(dataset, steps=2, verbose=1) @@ -486,11 +501,23 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0, validation_data=dataset, validation_steps=2) + @combinations.generate(strategy_and_optimizer_combinations()) + def test_fit_eval_and_predict_with_optimizer(self, distribution, optimizer): + with self.cached_session(): + model = get_model() + + loss = 'mse' + model.compile(optimizer(), loss, distribute=distribution) + + dataset = get_dataset(distribution) + + model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1) + model.evaluate(dataset, steps=2, verbose=1) + model.predict(dataset, steps=2) + def test_unsupported_features(self): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' @@ -500,11 +527,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): model.compile(optimizer, loss, metrics=metrics, distribute=strategy) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) + dataset = get_dataset(strategy) # Test with validation split with self.assertRaisesRegexp( @@ -541,9 +564,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): def test_calling_with_unsupported_predefined_callbacks(self): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = gradient_descent.GradientDescentOptimizer(0.001) loss = 'mse' @@ -552,11 +573,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): '/device:GPU:0']) model.compile(optimizer, loss, metrics=metrics, distribute=strategy) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) - dataset = dataset.repeat(100) - dataset = dataset.batch(10) + dataset = get_dataset(strategy) def schedule(_): return 0.001 @@ -580,9 +597,7 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): def test_dataset_input_shape_validation(self): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' @@ -616,17 +631,13 @@ class TestWithDistributionStrategy(test.TestCase, parameterized.TestCase): mode=['graph'])) def test_dataset_input_shape_fully_defined(self, distribution): with self.cached_session(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) + model = get_model() optimizer = rmsprop.RMSPropOptimizer(learning_rate=0.001) loss = 'mse' model.compile(optimizer, loss, distribute=distribution) - inputs = np.zeros((10, 3), dtype=np.float32) - targets = np.zeros((10, 4), dtype=np.float32) - dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets)) + dataset = get_dataset(distribution) # Input shapes are not fully known. Batch dimension is unknown as we are # not using the drop_remainder argument. dataset = dataset.repeat(100).batch(10) @@ -698,7 +709,7 @@ class LossMaskingWithDistributionStrategyTest(test.TestCase): class NormalizationLayerWithDistributionStrategyTest( test.TestCase, parameterized.TestCase): - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) def test_batchnorm_correctness(self, distribution): with self.cached_session(): model = keras.models.Sequential() @@ -726,7 +737,7 @@ class NormalizationLayerWithDistributionStrategyTest( class CorrectnessWithDistributionStrategyTest(test.TestCase, parameterized.TestCase): - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) def test_metric_correctness(self, distribution): with self.cached_session(): keras.backend.set_image_data_format('channels_last') @@ -756,7 +767,7 @@ class CorrectnessWithDistributionStrategyTest(test.TestCase, history = model.fit(x=train_dataset, epochs=1, steps_per_epoch=10) self.assertEqual(history.history['binary_accuracy'], [1.0]) - @combinations.generate(all_combinations()) + @combinations.generate(strategy_combinations()) def test_correctness(self, distribution): with self.cached_session(): keras.backend.set_image_data_format('channels_last') diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 46bffd7068..5091cac836 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -851,7 +851,8 @@ class Model(Network): # able to clone a Dataset on multiple workers we can remove this lambda. result = self._distribution_strategy.distribute_dataset(lambda: x) iterator = result.make_initializable_iterator() - K.get_session().run(iterator.initializer) + with self._distribution_strategy.scope(): + K.get_session().run(iterator.initializer) training_utils.validate_iterator_input(x, y, sample_weight, validation_split) diff --git a/tensorflow/python/keras/engine/training_distributed.py b/tensorflow/python/keras/engine/training_distributed.py index 1b64f904d5..a6470458d2 100644 --- a/tensorflow/python/keras/engine/training_distributed.py +++ b/tensorflow/python/keras/engine/training_distributed.py @@ -112,100 +112,99 @@ def fit_loop( dataset_targets = distributed_training_utils.flatten_perdevice_values( current_strategy, targets) - # Create a train function that is composed of all the parameters above. - distributed_train_function = K.Function( - all_inputs, all_outputs, - updates=all_updates, - name='distributed_train_function', - **all_session_args) - - # We need to set sample_weights to None since there are sample weight - # placeholders that are created with default values. - sample_weights = [None for _ in range(len(model.outputs) * - current_strategy.num_towers)] - if model.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = dataset_inputs + dataset_targets + sample_weights + [1] - else: - ins = dataset_inputs + dataset_targets + # Create a train function that is composed of all the parameters above. + distributed_train_function = K.Function( + all_inputs, all_outputs, + updates=all_updates, + name='distributed_train_function', + **all_session_args) + + # We need to set sample_weights to None since there are sample weight + # placeholders that are created with default values. + sample_weights = [None for _ in range(len(model.outputs) * + current_strategy.num_towers)] + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = dataset_inputs + dataset_targets + sample_weights + [1] + else: + ins = dataset_inputs + dataset_targets - do_validation = False - if validation_steps: - do_validation = True + do_validation = False + if validation_steps: + do_validation = True - # Copy the weights from the original model to each of the replicated models. - orig_model_weights = model.get_weights() - with current_strategy.scope(): + # Copy the weights from the original model to each of the replicated models. + orig_model_weights = model.get_weights() distributed_model = current_strategy.unwrap(model._grouped_model)[0] distributed_training_utils.set_weights( current_strategy, distributed_model, orig_model_weights) - callbacks = cbks.configure_callbacks( - callbacks, - model, - do_validation=do_validation, - val_inputs=None, - val_targets=None, - epochs=epochs, - steps_per_epoch=steps_per_epoch, - verbose=verbose) - out_labels = model.metrics_names or [] - callbacks.on_train_begin() - - assert steps_per_epoch is not None - - for epoch in range(initial_epoch, epochs): - # Reset stateful metrics - for m in model.stateful_metric_functions: - m.reset_states() - callbacks.on_epoch_begin(epoch) - epoch_logs = {} - for step_index in range(steps_per_epoch): - batch_logs = {'batch': step_index, 'size': 1} - callbacks.on_batch_begin(step_index, batch_logs) - try: - outs = distributed_train_function(ins) - except errors.OutOfRangeError: - logging.warning('Your dataset iterator ran out of data; ' - 'interrupting training. Make sure that your dataset ' - 'can generate at least `steps_per_epoch * epochs` ' - 'batches (in this case, %d batches).' % - steps_per_epoch * epochs) - break - - if not isinstance(outs, list): - outs = [outs] - - outs = _aggregate_metrics_across_towers(current_strategy.num_towers, - out_labels, - model.stateful_metric_names, outs) - for l, o in zip(out_labels, outs): - batch_logs[l] = o - callbacks.on_batch_end(step_index, batch_logs) + callbacks = cbks.configure_callbacks( + callbacks, + model, + do_validation=do_validation, + val_inputs=None, + val_targets=None, + epochs=epochs, + steps_per_epoch=steps_per_epoch, + verbose=verbose) + out_labels = model.metrics_names or [] + callbacks.on_train_begin() + + assert steps_per_epoch is not None + + for epoch in range(initial_epoch, epochs): + # Reset stateful metrics + for m in model.stateful_metric_functions: + m.reset_states() + callbacks.on_epoch_begin(epoch) + epoch_logs = {} + for step_index in range(steps_per_epoch): + batch_logs = {'batch': step_index, 'size': 1} + callbacks.on_batch_begin(step_index, batch_logs) + try: + outs = distributed_train_function(ins) + except errors.OutOfRangeError: + logging.warning('Your dataset iterator ran out of data; ' + 'interrupting training. Make sure that your dataset ' + 'can generate at least `steps_per_epoch * epochs` ' + 'batches (in this case, %d batches).' % + steps_per_epoch * epochs) + break + + if not isinstance(outs, list): + outs = [outs] + + outs = _aggregate_metrics_across_towers(current_strategy.num_towers, + out_labels, + model.stateful_metric_names, + outs) + for l, o in zip(out_labels, outs): + batch_logs[l] = o + callbacks.on_batch_end(step_index, batch_logs) + if callbacks.model.stop_training: + break + if do_validation: + val_outs = test_loop( + model, + val_iterator, + steps=validation_steps, + verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o + + callbacks.on_epoch_end(epoch, epoch_logs) if callbacks.model.stop_training: break - if do_validation: - val_outs = test_loop( - model, - val_iterator, - steps=validation_steps, - verbose=0) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o + callbacks.on_train_end() - callbacks.on_epoch_end(epoch, epoch_logs) - if callbacks.model.stop_training: - break - callbacks.on_train_end() - - # Copy the weights back from the replicated model to the original model. - with current_strategy.scope(): + # Copy the weights back from the replicated model to the original model. updated_weights = current_strategy.unwrap( model._grouped_model)[0].get_weights() model.set_weights(updated_weights) - return model.history + return model.history def _experimental_fit_loop( @@ -427,66 +426,65 @@ def test_loop(model, iterator, verbose=0, steps=None): dataset_targets = distributed_training_utils.flatten_perdevice_values( current_strategy, targets) - distributed_test_function = K.Function( - all_inputs, all_outputs, - updates=all_updates, - name='distributed_test_function', - **all_session_args) - - # We need to set sample_weights to None since there are sample weight - # placeholders that are created with default values. - sample_weights = [None for _ in range(len(model.outputs) * - current_strategy.num_towers)] - if model.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = dataset_inputs + dataset_targets + sample_weights + [0] - else: - ins = dataset_inputs + dataset_targets + distributed_test_function = K.Function( + all_inputs, all_outputs, + updates=all_updates, + name='distributed_test_function', + **all_session_args) - for m in model.stateful_metric_functions: - m.reset_states() - stateful_metric_indices = [ - i for i, name in enumerate(model.metrics_names) - if str(name) in model.stateful_metric_names - ] + # We need to set sample_weights to None since there are sample weight + # placeholders that are created with default values. + sample_weights = [None for _ in range(len(model.outputs) * + current_strategy.num_towers)] + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = dataset_inputs + dataset_targets + sample_weights + [0] + else: + ins = dataset_inputs + dataset_targets - outs = [] - if verbose == 1: - progbar = Progbar(target=steps) + for m in model.stateful_metric_functions: + m.reset_states() + stateful_metric_indices = [ + i for i, name in enumerate(model.metrics_names) + if str(name) in model.stateful_metric_names + ] - # Copy the weights from the original model to each of the replicated models. - orig_model_weights = model.get_weights() - with current_strategy.scope(): + outs = [] + if verbose == 1: + progbar = Progbar(target=steps) + + # Copy the weights from the original model to each of the replicated models. + orig_model_weights = model.get_weights() distributed_model = current_strategy.unwrap(model._grouped_model)[0] distributed_training_utils.set_weights( current_strategy, distributed_model, orig_model_weights) - assert steps is not None - for step in range(steps): - batch_outs = distributed_test_function(ins) - batch_outs = _aggregate_metrics_across_towers( - current_strategy.num_towers, model.metrics_names, - model.stateful_metric_names, batch_outs) - if isinstance(batch_outs, list): - if step == 0: - outs = [0.] * len(batch_outs) - for i, batch_out in enumerate(batch_outs): - if i in stateful_metric_indices: - outs[i] = batch_out - else: - outs[i] += batch_out - else: - if step == 0: - outs.append(0.) - outs[0] += batch_outs - if verbose >= 1: - progbar.update(step + 1) - for i in range(len(outs)): - if i not in stateful_metric_indices: - outs[i] /= steps + assert steps is not None + for step in range(steps): + batch_outs = distributed_test_function(ins) + batch_outs = _aggregate_metrics_across_towers( + current_strategy.num_towers, model.metrics_names, + model.stateful_metric_names, batch_outs) + if isinstance(batch_outs, list): + if step == 0: + outs = [0.] * len(batch_outs) + for i, batch_out in enumerate(batch_outs): + if i in stateful_metric_indices: + outs[i] = batch_out + else: + outs[i] += batch_out + else: + if step == 0: + outs.append(0.) + outs[0] += batch_outs + if verbose >= 1: + progbar.update(step + 1) + for i in range(len(outs)): + if i not in stateful_metric_indices: + outs[i] /= steps - if len(outs) == 1: - return outs[0] - return outs + if len(outs) == 1: + return outs[0] + return outs def _experimental_test_loop(model, iterator, verbose=0, steps=None): @@ -647,51 +645,50 @@ def predict_loop(model, iterator, verbose=0, steps=None): dataset_inputs = distributed_training_utils.flatten_perdevice_values( current_strategy, inputs) - distributed_predict_function = K.Function( - all_inputs, all_outputs, - updates=all_updates, - name='distributed_predict_function', - **all_session_args) + distributed_predict_function = K.Function( + all_inputs, all_outputs, + updates=all_updates, + name='distributed_predict_function', + **all_session_args) - if model.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = dataset_inputs + [0] - else: - ins = dataset_inputs + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = dataset_inputs + [0] + else: + ins = dataset_inputs - if verbose == 1: - progbar = Progbar(target=steps) + if verbose == 1: + progbar = Progbar(target=steps) - # Copy the weights from the original model to each of the replicated models. - orig_model_weights = model.get_weights() - with current_strategy.scope(): + # Copy the weights from the original model to each of the replicated models. + orig_model_weights = model.get_weights() distributed_model = current_strategy.unwrap(model._grouped_model)[0] distributed_training_utils.set_weights( current_strategy, distributed_model, orig_model_weights) - if steps is not None: - # Since we do not know how many samples we will see, we cannot pre-allocate - # the returned Numpy arrays. Instead, we store one array per batch seen - # and concatenate them upon returning. - unconcatenated_outs = [] - for step in range(steps): - batch_outs = distributed_predict_function(ins) - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - if step == 0: - for _ in batch_outs: - unconcatenated_outs.append([]) - # TODO(anjalisridhar): Should combine the outputs from multiple towers - # correctly here. - for i, batch_out in enumerate(batch_outs): - unconcatenated_outs[i].append(batch_out) - if verbose >= 1: - progbar.update(step + 1) - if len(unconcatenated_outs) == 1: - return np.concatenate(unconcatenated_outs[0], axis=0) - return [ - np.concatenate(unconcatenated_outs[i], axis=0) - for i in range(len(unconcatenated_outs)) - ] + if steps is not None: + # Since we do not know how many samples we will see, we cannot + # pre-allocate the returned Numpy arrays. Instead, we store one array per + # batch seen and concatenate them upon returning. + unconcatenated_outs = [] + for step in range(steps): + batch_outs = distributed_predict_function(ins) + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + if step == 0: + for _ in batch_outs: + unconcatenated_outs.append([]) + # TODO(anjalisridhar): Should combine the outputs from multiple towers + # correctly here. + for i, batch_out in enumerate(batch_outs): + unconcatenated_outs[i].append(batch_out) + if verbose >= 1: + progbar.update(step + 1) + if len(unconcatenated_outs) == 1: + return np.concatenate(unconcatenated_outs[0], axis=0) + return [ + np.concatenate(unconcatenated_outs[i], axis=0) + for i in range(len(unconcatenated_outs)) + ] def _experimental_predict_loop(model, iterator, verbose=0, steps=None): -- GitLab From fa8c1eabd06f3043be820bf476e8413818853f17 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 28 Sep 2018 00:04:20 -0700 Subject: [PATCH 130/570] Internal PiperOrigin-RevId: 214895147 --- tensorflow/examples/android/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/examples/android/BUILD b/tensorflow/examples/android/BUILD index f327b645f5..f5f0d7c3c8 100644 --- a/tensorflow/examples/android/BUILD +++ b/tensorflow/examples/android/BUILD @@ -68,6 +68,7 @@ android_binary( srcs = glob([ "src/**/*.java", ]), + aapt_version = "aapt", # Package assets from assets dir as well as all model targets. Remove undesired models # (and corresponding Activities in source) to reduce APK size. assets = [ -- GitLab From d0690d46466bf0393ad65544d1e8c55e948df133 Mon Sep 17 00:00:00 2001 From: EFanZh Date: Fri, 28 Sep 2018 15:20:26 +0800 Subject: [PATCH 131/570] Fix some documentation errors --- tensorflow/contrib/distribute/python/mirrored_strategy.py | 5 +++-- tensorflow/python/keras/engine/training.py | 2 +- tensorflow/python/training/distribute.py | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/distribute/python/mirrored_strategy.py b/tensorflow/contrib/distribute/python/mirrored_strategy.py index 504f45a695..c0861da567 100644 --- a/tensorflow/contrib/distribute/python/mirrored_strategy.py +++ b/tensorflow/contrib/distribute/python/mirrored_strategy.py @@ -318,12 +318,13 @@ class MirroredStrategy(distribute_lib.DistributionStrategy): [TensorFlow's documentation](https://www.tensorflow.org/deploy/distributed). The distribution strategy inherits these concepts as well and in addition to that we also clarify several more concepts: - * **In-graph replication**: the `client` creates a single `tf.Graph` that + + * **In-graph replication**: the `client` creates a single `tf.Graph` that specifies tasks for devices on all workers. The `client` then creates a client session which will talk to the `master` service of a `worker`. Then the `master` will partition the graph and distribute the work to all participating workers. - * **Worker**: A `worker` is a TensorFlow `task` that usually maps to one + * **Worker**: A `worker` is a TensorFlow `task` that usually maps to one physical machine. We will have multiple `worker`s with different `task` index. They all do similar things except for one worker checkpointing model variables, writing summaries, etc. in addition to its ordinary work. diff --git a/tensorflow/python/keras/engine/training.py b/tensorflow/python/keras/engine/training.py index 5091cac836..1bd8422658 100644 --- a/tensorflow/python/keras/engine/training.py +++ b/tensorflow/python/keras/engine/training.py @@ -2356,6 +2356,6 @@ class DistributedCallbackModel(Model): # Whitelisted atttributes of the model that can be accessed by the user # during a callback. if item not in ['_setattr_tracking']: - logging.warning('You are accessing attribute ' + item + 'of the ' + logging.warning('You are accessing attribute ' + item + ' of the ' 'DistributedCallbackModel that may not have been set ' 'correctly.') diff --git a/tensorflow/python/training/distribute.py b/tensorflow/python/training/distribute.py index 419a9ec12b..fd4704285c 100644 --- a/tensorflow/python/training/distribute.py +++ b/tensorflow/python/training/distribute.py @@ -631,7 +631,7 @@ class DistributionStrategy(object): Args: fn: function to run using this distribution strategy. The function must - have the following signature: def fn(context, *inputs). + have the following signature: `def fn(context, *inputs)`. `context` is an instance of `MultiStepContext` that will be passed when `fn` is run. `context` can be used to specify the outputs to be returned from `fn` by calling `context.set_last_step_output`. It can also be used @@ -797,9 +797,9 @@ class DistributionStrategy(object): return merged(results) ``` - Otherwise this returns `fn(var, *args, **kwargs)` colocated with `var`.' + Otherwise this returns `fn(var, *args, **kwargs)` colocated with `var`. - Neither *args nor **kwargs may contain per-device values. + Neither `*args` nor `**kwargs` may contain per-device values. If they contain mirrored values, they will be unwrapped before calling `fn`. -- GitLab From 19b2383cc0e221262be0780180558cf5bbb3e37e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 28 Sep 2018 02:01:03 -0700 Subject: [PATCH 132/570] compat: Update forward compatibility horizon to 2018-09-28 PiperOrigin-RevId: 214904795 --- tensorflow/python/compat/compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/compat/compat.py b/tensorflow/python/compat/compat.py index 88cad5d6d9..b74fce3a4c 100644 --- a/tensorflow/python/compat/compat.py +++ b/tensorflow/python/compat/compat.py @@ -26,7 +26,7 @@ import datetime from tensorflow.python.util import tf_contextlib from tensorflow.python.util.tf_export import tf_export -_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 27) +_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 9, 28) @tf_export("compat.forward_compatible") -- GitLab From 8eb27871583d9fc61e046493acaa0df2839bc1c7 Mon Sep 17 00:00:00 2001 From: wangsiyu Date: Fri, 28 Sep 2018 18:51:34 +0800 Subject: [PATCH 133/570] remove slash --- tensorflow/python/ops/variables.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 69f63bc8e6..262cd61e5a 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -2401,7 +2401,8 @@ class PartitionedVariable(object): partition_axes = self._partition_axes() if len(partition_axes) > 1: raise NotImplementedError( - "Multi-axis partition assign_fn is not supported " + "Cannot do assign action along more than one dimension: %s. " + "Multi-axis partition assign action is not supported " % str(partition_axes)) partition_ix = partition_axes[0] size_splits_list = [ @@ -2409,7 +2410,7 @@ class PartitionedVariable(object): value_list = array_ops.split( value, size_splits_list, axis=partition_ix) op_list = [ - assign_fn(var, value_list[idx], idx) \ + assign_fn(var, value_list[idx], idx) for idx, var in enumerate(self._variable_list)] return op_list -- GitLab From 32627bfba19606d3c3a34f5d02ae9428675bbc42 Mon Sep 17 00:00:00 2001 From: Todd Wang Date: Fri, 28 Sep 2018 07:28:19 -0700 Subject: [PATCH 134/570] Allow testManyCPUs to encounter non-CPU devices. PiperOrigin-RevId: 214932861 --- tensorflow/python/client/session_test.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 5c0c405306..347833ce8f 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -120,11 +120,17 @@ class SessionTest(test_util.TensorFlowTestCase): inp = constant_op.constant(10.0, name='W1') self.assertAllEqual(inp.eval(), 10.0) - devices = sess.list_devices() - self.assertEqual(2, len(devices)) - for device in devices: - self.assertEqual('CPU', framework_device_lib.DeviceSpec.from_string( - device.name).device_type) + num_cpu_devices = 0 + num_gpu_devices = 0 + for device in sess.list_devices(): + device_type = framework_device_lib.DeviceSpec.from_string( + device.name).device_type + if device_type == 'CPU': + num_cpu_devices += 1 + elif device_type == 'GPU': + num_gpu_devices += 1 + self.assertEqual(2, num_cpu_devices) + self.assertEqual(0, num_gpu_devices) def testPerSessionThreads(self): with session.Session( -- GitLab From 4e955be2ae1c920623778c15357129fea9a3bdab Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Fri, 28 Sep 2018 08:26:55 -0700 Subject: [PATCH 135/570] Fixing a couple of small bugs with the multi device iterator having to deal with the case when the background thread terminated because the iterator finished and yet some other requests were coming in. 1. The GetNextFromShard would see an empty buffer and return cancelled instead of OutOfRange errors 2. On shutdown, we weren't calling all the pending callbacks. Tested with runs_per_test=5000 PiperOrigin-RevId: 214939274 --- .../kernels/data/multi_device_iterator_ops.cc | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc index 5f143967d9..d909b9e9d3 100644 --- a/tensorflow/core/kernels/data/multi_device_iterator_ops.cc +++ b/tensorflow/core/kernels/data/multi_device_iterator_ops.cc @@ -134,19 +134,17 @@ class MultiDeviceIterator : public ResourceBase { void Reset() LOCKS_EXCLUDED(mu_) { { mutex_lock l(mu_); - if (background_thread_finished_) { - return; - } - - cancelled_ = true; - // Wake up the background thread. - for (int i = 0; i < size_; ++i) { - buffer_[i].cond_var.notify_all(); - } + if (!background_thread_finished_) { + cancelled_ = true; + // Wake up the background thread. + for (int i = 0; i < size_; ++i) { + buffer_[i].cond_var.notify_all(); + } - // Make sure background thread has finished first. - while (!background_thread_finished_) { - shutdown_cond_var_.wait(l); + // Make sure background thread has finished first. + while (!background_thread_finished_) { + shutdown_cond_var_.wait(l); + } } } RunPendingCallbacks(); @@ -182,7 +180,7 @@ class MultiDeviceIterator : public ResourceBase { buffer_[shard_num].cond_var.notify_all(); } } else { - if (background_thread_finished_) { + if (end_of_iterator_) { produced_output = true; elem.end_of_sequence = true; } else { @@ -219,8 +217,12 @@ class MultiDeviceIterator : public ResourceBase { while (!buffer_[i].callbacks.empty()) { if (buffer_[i].data.empty()) { HostBufferElement elem; - elem.status = - errors::Cancelled("Cancelled and buffer not filled."); + if (end_of_iterator_) { + elem.end_of_sequence = true; + } else { + elem.status = + errors::Cancelled("Cancelled and buffer not filled."); + } cancellation_elements.push_back(std::move(elem)); } else { cancellation_elements.push_back( @@ -293,6 +295,7 @@ class MultiDeviceIterator : public ResourceBase { { mutex_lock l(mu_); background_thread_finished_ = true; + end_of_iterator_ = true; shutdown_cond_var_.notify_all(); } RunPendingCallbacks(); @@ -312,6 +315,7 @@ class MultiDeviceIterator : public ResourceBase { std::unique_ptr background_thread_ GUARDED_BY(mu_); bool background_thread_finished_ GUARDED_BY(mu_) = false; bool background_thread_started_ GUARDED_BY(mu_) = false; + bool end_of_iterator_ GUARDED_BY(mu_) = false; bool cancelled_ GUARDED_BY(mu_) = false; condition_variable shutdown_cond_var_ GUARDED_BY(mu_); -- GitLab From a74a3217f7ff2dbee2fb618aa658cf666861545c Mon Sep 17 00:00:00 2001 From: Jason Zaman Date: Sat, 4 Aug 2018 14:13:00 +0800 Subject: [PATCH 136/570] Move bazel.rc to workspace root to support bazel-0.18.0 Bazel 0.18.0 will contain a change for which rc files it accepts. https://github.com/bazelbuild/bazel/commit/ec83598cb6ee4136166bb562a24dc5dfa58921db https://github.com/bazelbuild/bazel/issues/4502 Old bazel used to read %workspace%/tools/bazel.rc. New bazel will not read that and instead will only read %workspace%/.bazelrc. Signed-off-by: Jason Zaman --- tools/bazel.rc => .bazelrc | 4 +++- .gitignore | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) rename tools/bazel.rc => .bazelrc (98%) diff --git a/tools/bazel.rc b/.bazelrc similarity index 98% rename from tools/bazel.rc rename to .bazelrc index 3734fab715..9f09fdff97 100644 --- a/tools/bazel.rc +++ b/.bazelrc @@ -29,7 +29,7 @@ build:mkl -c opt # This config option is used to enable MKL-DNN open source library only, # without depending on MKL binary version. -build:mkl_open_source_only --define=build_with_mkl_dnn_only=true +build:mkl_open_source_only --define=build_with_mkl_dnn_only=true build:mkl_open_source_only --define=build_with_mkl=true --define=enable_mkl=true build:download_clang --crosstool_top=@local_config_download_clang//:toolchain @@ -84,3 +84,5 @@ build:dynamic_kernels --define=dynamic_loaded_kernels=true build --define=PREFIX=/usr build --define=LIBDIR=$(PREFIX)/lib build --define=INCLUDEDIR=$(PREFIX)/include + +# Do not commit the tf_configure.bazelrc line diff --git a/.gitignore b/.gitignore index 1ef4c297ee..cb65f447d4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ .DS_Store .ipynb_checkpoints node_modules -/.bazelrc /.tf_configure.bazelrc /bazel-* /bazel_pip -- GitLab From d3f6b72bc7356d5c94289e32426dc482b8ededf0 Mon Sep 17 00:00:00 2001 From: Jason Zaman Date: Sat, 4 Aug 2018 14:28:02 +0800 Subject: [PATCH 137/570] configure: use workspace-relative path to tf_configure_bazelrc /.bazelrc is not gitignored anymore so this should help in case the import line is accidentally committed. Bazel 0.18.0 will support a new 'try-import' statement that should be used once 0.18.0 has been out long enough. Signed-off-by: Jason Zaman --- configure.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/configure.py b/configure.py index 55fce8b93b..129d9c5fe7 100644 --- a/configure.py +++ b/configure.py @@ -257,11 +257,7 @@ def reset_tf_configure_bazelrc(workspace_path): if _TF_BAZELRC_FILENAME in l: continue f.write('%s\n' % l) - if is_windows(): - tf_bazelrc_path = _TF_BAZELRC.replace('\\', '/') - else: - tf_bazelrc_path = _TF_BAZELRC - f.write('import %s\n' % tf_bazelrc_path) + f.write('import %%workspace%%/%s\n' % _TF_BAZELRC_FILENAME) def cleanup_makefile(): -- GitLab From e06783e7bb80f664c7ec9be90680ac6ddcbd598f Mon Sep 17 00:00:00 2001 From: Brian Patton Date: Fri, 28 Sep 2018 08:38:25 -0700 Subject: [PATCH 138/570] Fix a latex render nit PiperOrigin-RevId: 214940748 --- tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt b/tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt index 40d7d371ca..7142a0e3f2 100644 --- a/tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Igamma.pbtxt @@ -9,7 +9,7 @@ The lower regularized incomplete Gamma function is defined as: where -\\(gamma(a, x) = int_{0}^{x} t^{a-1} exp(-t) dt\\) +\\(gamma(a, x) = \\int_{0}^{x} t^{a-1} exp(-t) dt\\) is the lower incomplete Gamma function. -- GitLab From c7bb3c3d65e4e064d53630d4b524522eed6f3f44 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 28 Sep 2018 08:38:53 -0700 Subject: [PATCH 139/570] [tf.data] Move `tf.contrib.data` C++ code to a core "experimental" directory. NOTE: All ops and kernels previously previously defined in tensorflow/contrib/data have had their name prefixed with "Experimental" to indicate that they are not (yet) stable, and thus not subject to backwards or forwards compatibility guarantees. PiperOrigin-RevId: 214940819 --- tensorflow/contrib/BUILD | 3 - tensorflow/contrib/cmake/python_modules.txt | 1 - tensorflow/contrib/data/BUILD | 38 ----- .../contrib/data/ops/indexed_dataset_ops.cc | 80 --------- .../contrib/data/python/kernel_tests/BUILD | 3 +- .../kernel_tests/indexed_dataset_ops_test.py | 12 +- tensorflow/contrib/data/python/ops/BUILD | 57 +------ .../data/python/ops/contrib_op_loader.py | 24 --- .../contrib/data/python/ops/error_ops.py | 5 +- .../data/python/ops/indexed_dataset_ops.py | 25 +-- .../contrib/data/python/ops/interleave_ops.py | 13 +- .../contrib/data/python/ops/optimization.py | 5 +- .../data/python/ops/prefetching_ops.py | 37 ++-- tensorflow/contrib/data/python/ops/readers.py | 6 +- .../contrib/data/python/ops/threadpool.py | 9 +- tensorflow/contrib/data/python/ops/unique.py | 5 +- tensorflow/core/BUILD | 2 + ...pi_def_ExperimentalAssertNextDataset.pbtxt | 4 + .../api_def_ExperimentalCSVDataset.pbtxt | 4 + ...xperimentalDirectedInterleaveDataset.pbtxt | 21 +++ ...xperimentalFunctionBufferingResource.pbtxt | 58 +++++++ ...ntalFunctionBufferingResourceGetNext.pbtxt | 25 +++ ...mentalFunctionBufferingResourceReset.pbtxt | 13 ++ ...f_ExperimentalIdentityIndexedDataset.pbtxt | 4 + ..._def_ExperimentalIgnoreErrorsDataset.pbtxt | 8 + ...pi_def_ExperimentalIndexedDatasetGet.pbtxt | 4 + ...xperimentalIndexedDatasetMaterialize.pbtxt | 4 + ...pi_def_ExperimentalIteratorGetDevice.pbtxt | 8 + .../api_def_ExperimentalLMDBDataset.pbtxt | 4 + ...mentalMaterializedIndexDatasetHandle.pbtxt | 4 + ...pi_def_ExperimentalThreadPoolDataset.pbtxt | 13 ++ ...api_def_ExperimentalThreadPoolHandle.pbtxt | 35 ++++ .../api_def_ExperimentalUniqueDataset.pbtxt | 8 + tensorflow/core/kernels/data/BUILD | 1 + .../kernels/data/experimental}/BUILD | 90 +++++----- .../experimental}/assert_next_dataset_op.cc | 5 +- .../data/experimental}/csv_dataset_op.cc | 3 +- .../directed_interleave_dataset_op.cc | 5 +- .../experimental}/identity_indexed_dataset.cc | 7 +- .../experimental}/ignore_errors_dataset_op.cc | 6 +- .../data/experimental}/indexed_dataset.cc | 14 +- .../data/experimental}/indexed_dataset.h | 6 +- .../data/experimental}/lmdb_dataset_op.cc | 3 +- .../data/experimental}/prefetching_kernels.cc | 23 +-- .../experimental}/threadpool_dataset_op.cc | 7 +- .../data/experimental}/unique_dataset_op.cc | 7 +- .../ops/experimental_dataset_ops.cc} | 161 +++++++++--------- tensorflow/python/BUILD | 9 + tensorflow/tools/pip_package/BUILD | 1 - 49 files changed, 469 insertions(+), 421 deletions(-) delete mode 100644 tensorflow/contrib/data/ops/indexed_dataset_ops.cc delete mode 100644 tensorflow/contrib/data/python/ops/contrib_op_loader.py create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalAssertNextDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalCSVDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResource.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceGetNext.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalFunctionBufferingResourceReset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIdentityIndexedDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIgnoreErrorsDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetGet.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIndexedDatasetMaterialize.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalIteratorGetDevice.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalLMDBDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalMaterializedIndexDatasetHandle.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolDataset.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalThreadPoolHandle.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_ExperimentalUniqueDataset.pbtxt rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/BUILD (52%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/assert_next_dataset_op.cc (97%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/csv_dataset_op.cc (99%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/directed_interleave_dataset_op.cc (98%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/identity_indexed_dataset.cc (96%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/ignore_errors_dataset_op.cc (96%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/indexed_dataset.cc (97%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/indexed_dataset.h (95%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/lmdb_dataset_op.cc (98%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/prefetching_kernels.cc (95%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/threadpool_dataset_op.cc (97%) rename tensorflow/{contrib/data/kernels => core/kernels/data/experimental}/unique_dataset_op.cc (97%) rename tensorflow/{contrib/data/ops/dataset_ops.cc => core/ops/experimental_dataset_ops.cc} (62%) diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 1a9ae8ac3a..98dff965a9 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -132,7 +132,6 @@ cc_library( deps = [ "//tensorflow/contrib/boosted_trees:boosted_trees_kernels", "//tensorflow/contrib/coder:all_kernels", - "//tensorflow/contrib/data/kernels:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/hadoop:dataset_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", @@ -163,8 +162,6 @@ cc_library( deps = [ "//tensorflow/contrib/boosted_trees:boosted_trees_ops_op_lib", "//tensorflow/contrib/coder:all_ops", - "//tensorflow/contrib/data:dataset_ops_op_lib", - "//tensorflow/contrib/data:indexed_dataset_ops_op_lib", "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/hadoop:dataset_ops_op_lib", diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index c0763f4c0e..2975b167ec 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -132,7 +132,6 @@ tensorflow/contrib/cudnn_rnn/python tensorflow/contrib/cudnn_rnn/python/layers tensorflow/contrib/cudnn_rnn/python/ops tensorflow/contrib/data -tensorflow/contrib/data/kernels tensorflow/contrib/data/python tensorflow/contrib/data/python/kernel_tests tensorflow/contrib/data/python/kernel_tests/serialization diff --git a/tensorflow/contrib/data/BUILD b/tensorflow/contrib/data/BUILD index 9f710613dd..38f1c65a4d 100644 --- a/tensorflow/contrib/data/BUILD +++ b/tensorflow/contrib/data/BUILD @@ -4,17 +4,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load( - "//tensorflow:tensorflow.bzl", - "tf_custom_op_library", - "tf_gen_op_libs", - "if_not_windows", -) -load( - "//tensorflow/core:platform/default/build_config_root.bzl", - "if_static", -) - py_library( name = "data", srcs = ["__init__.py"], @@ -25,30 +14,3 @@ py_library( "//tensorflow/python:util", ], ) - -cc_library( - name = "lib_proto_parsing_for_dataset_ops", - deps = if_not_windows(["//tensorflow/core:lib_proto_parsing"]), -) - -tf_custom_op_library( - name = "_dataset_ops.so", - srcs = [ - "ops/dataset_ops.cc", - "ops/indexed_dataset_ops.cc", - ], - deps = [ - "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/contrib/data/kernels:indexed_dataset", - ] + if_static( - extra_deps = [":lib_proto_parsing_for_dataset_ops"], - otherwise = [], - ), -) - -tf_gen_op_libs( - op_lib_names = [ - "dataset_ops", - "indexed_dataset_ops", - ], -) diff --git a/tensorflow/contrib/data/ops/indexed_dataset_ops.cc b/tensorflow/contrib/data/ops/indexed_dataset_ops.cc deleted file mode 100644 index cd9b7c68a0..0000000000 --- a/tensorflow/contrib/data/ops/indexed_dataset_ops.cc +++ /dev/null @@ -1,80 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" - -namespace tensorflow { - -REGISTER_OP("IdentityIndexedDataset") - .Input("size: uint64") - .Output("handle: variant") - .SetIsStateful() - .SetShapeFn( - shape_inference::ScalarShape); // TODO(saeta): check input shapes. - -/////////////////////////////////////////////////////////////////////////////// -// IndexedDataset Internals -/////////////////////////////////////////////////////////////////////////////// - -// Creates the handle. -REGISTER_OP("MaterializedIndexDatasetHandle") - .Output("handle: resource") - .Attr("container: string") - .Attr("shared_name: string") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); - -// Actually materialize the materialize handle. -REGISTER_OP("IndexedDatasetMaterialize") - .Input("dataset: variant") - .Input("materialized: resource") - .SetShapeFn(shape_inference::NoOutputs); - -namespace { - -Status GetShapeFn(shape_inference::InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - std::vector output_shapes; - TF_RETURN_IF_ERROR(c->GetAttr("output_shapes", &output_shapes)); - if (output_shapes.size() != c->num_outputs()) { - return errors::InvalidArgument( - "`output_shapes` must be the same length as `output_types` (", - output_shapes.size(), " vs. ", c->num_outputs()); - } - for (size_t i = 0; i < output_shapes.size(); ++i) { - shape_inference::ShapeHandle output_shape_handle; - TF_RETURN_IF_ERROR(c->MakeShapeFromPartialTensorShape( - output_shapes[i], &output_shape_handle)); - c->set_output(static_cast(i), output_shape_handle); - } - return Status::OK(); -} - -} // namespace - -REGISTER_OP("IndexedDatasetGet") - .Input("materialized: resource") - .Input("index: uint64") - .Output("components: output_types") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(GetShapeFn) - .Doc(R"doc( -Gets the element at `index` from `materialized` IndexedDataset. -)doc"); - -} // namespace tensorflow diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index ce52c990ce..21ac40eb21 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -139,12 +139,11 @@ py_test( name = "indexed_dataset_ops_test", srcs = ["indexed_dataset_ops_test.py"], deps = [ - "//tensorflow/contrib/data/python/ops:contrib_op_loader", - "//tensorflow/contrib/data/python/ops:gen_dataset_ops", "//tensorflow/contrib/data/python/ops:indexed_dataset_ops", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python/data/ops:dataset_ops", "//third_party/py/numpy", ], diff --git a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py index 9c508d686d..46a7127b52 100644 --- a/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/indexed_dataset_ops_test.py @@ -19,29 +19,29 @@ from __future__ import print_function import unittest -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import indexed_dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops from tensorflow.python.platform import test class IndexedDatasetOpsTest(test.TestCase): def testLowLevelIndexedDatasetOps(self): - identity = gen_dataset_ops.identity_indexed_dataset( + identity = ged_ops.experimental_identity_indexed_dataset( ops.convert_to_tensor(16, dtype=dtypes.uint64)) - handle = gen_dataset_ops.materialized_index_dataset_handle( + handle = ged_ops.experimental_materialized_index_dataset_handle( container="", shared_name="", output_types=[dtypes.uint64], output_shapes=[[]]) - materialize = gen_dataset_ops.indexed_dataset_materialize(identity, handle) + materialize = ged_ops.experimental_indexed_dataset_materialize( + identity, handle) index = array_ops.placeholder(dtypes.uint64) - get_op = gen_dataset_ops.indexed_dataset_get( + get_op = ged_ops.experimental_indexed_dataset_get( handle, index, output_types=[dtypes.uint64], output_shapes=[[]]) with self.cached_session() as sess: diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index a14781cd93..5cd1ed542b 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -78,7 +78,6 @@ py_library( srcs_version = "PY2AND3", deps = [ ":batching", - ":gen_dataset_ops", ":interleave_ops", ":optimization", ":parsing_ops", @@ -86,6 +85,7 @@ py_library( "//tensorflow/python:constant_op", "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python:framework_ops", "//tensorflow/python:lib", "//tensorflow/python:platform", @@ -148,8 +148,7 @@ py_library( srcs = ["error_ops.py"], srcs_version = "PY2AND3", deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", "//tensorflow/python/data/util:sparse", @@ -179,12 +178,11 @@ py_library( srcs = ["interleave_ops.py"], srcs_version = "PY2AND3", deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", ":random_ops", "//tensorflow/contrib/stateless", "//tensorflow/python:array_ops", "//tensorflow/python:dtypes", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python:util", @@ -199,9 +197,8 @@ py_library( srcs = ["optimization.py"], srcs_version = "PY2AND3", deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", "//tensorflow/python:dtypes", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python:framework_ops", "//tensorflow/python/data/util:nest", "//tensorflow/python/data/util:sparse", @@ -304,8 +301,7 @@ py_library( srcs = ["threadpool.py"], srcs_version = "PY2AND3", deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python:resource_variable_ops", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", @@ -321,9 +317,8 @@ py_library( ], srcs_version = "PY2AND3", deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", "//tensorflow/python:dtypes", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", "//tensorflow/python/data/util:sparse", @@ -342,47 +337,11 @@ py_library( ], ) -tf_gen_op_wrapper_py( - name = "gen_dataset_ops", - out = "gen_dataset_ops.py", - deps = [ - "//tensorflow/contrib/data:dataset_ops_op_lib", - "//tensorflow/contrib/data:indexed_dataset_ops_op_lib", - ], -) - -tf_kernel_library( - name = "dataset_ops_kernels", - deps = [ - "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/core:framework", - ], - alwayslink = 1, -) - -tf_custom_op_py_library( - name = "contrib_op_loader", - srcs = ["contrib_op_loader.py"], - dso = ["//tensorflow/contrib/data:_dataset_ops.so"], - kernels = [ - ":dataset_ops_kernels", - "//tensorflow/contrib/data:indexed_dataset_ops_op_lib", - "//tensorflow/contrib/data:dataset_ops_op_lib", - ], - srcs_version = "PY2AND3", - deps = [ - ":gen_dataset_ops", - "//tensorflow/contrib/util:util_py", - "//tensorflow/python:platform", - ], -) - py_library( name = "indexed_dataset_ops", srcs = ["indexed_dataset_ops.py"], deps = [ - ":contrib_op_loader", - ":gen_dataset_ops", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python:framework_ops", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", @@ -394,7 +353,7 @@ py_library( name = "prefetching_ops", srcs = ["prefetching_ops.py"], deps = [ - ":contrib_op_loader", + "//tensorflow/python:experimental_dataset_ops_gen", "//tensorflow/python:framework_ops", "//tensorflow/python/data/ops:dataset_ops", "//tensorflow/python/data/util:nest", diff --git a/tensorflow/contrib/data/python/ops/contrib_op_loader.py b/tensorflow/contrib/data/python/ops/contrib_op_loader.py deleted file mode 100644 index 8f495a9dc9..0000000000 --- a/tensorflow/contrib/data/python/ops/contrib_op_loader.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Python helper for loading contrib ops and kernels.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.util import loader -from tensorflow.python.platform import resource_loader - -_dataset_ops = loader.load_op_library( - resource_loader.get_path_to_datafile("../../_dataset_ops.so")) diff --git a/tensorflow/contrib/data/python/ops/error_ops.py b/tensorflow/contrib/data/python/ops/error_ops.py index 615dbcabd4..f962e623ee 100644 --- a/tensorflow/contrib/data/python/ops/error_ops.py +++ b/tensorflow/contrib/data/python/ops/error_ops.py @@ -17,9 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.ops import gen_experimental_dataset_ops def ignore_errors(): @@ -60,7 +59,7 @@ class _IgnoreErrorsDataset(dataset_ops.UnaryDataset): self._input_dataset = input_dataset def _as_variant_tensor(self): - return gen_dataset_ops.ignore_errors_dataset( + return gen_experimental_dataset_ops.experimental_ignore_errors_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access **dataset_ops.flat_structure(self)) diff --git a/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py b/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py index cc76ab0850..9c06474a2f 100644 --- a/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/indexed_dataset_ops.py @@ -19,14 +19,13 @@ from __future__ import print_function import abc -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops class MaterializedIndexedDataset(object): @@ -57,7 +56,7 @@ class MaterializedIndexedDataset(object): A tensor containing the values corresponding to `index`. """ # TODO(saeta): nest.pack_sequence_as(...) - return gen_dataset_ops.indexed_dataset_get( + return ged_ops.experimental_indexed_dataset_get( self._materialized_resource, index, output_types=nest.flatten( @@ -90,16 +89,18 @@ class IndexedDataset(dataset_ops.Dataset): container = "" if shared_name is None: shared_name = "" - materialized_resource = gen_dataset_ops.materialized_index_dataset_handle( - container=container, - shared_name=shared_name, - output_types=nest.flatten( - sparse.as_dense_types(self.output_types, self.output_classes)), - output_shapes=nest.flatten( - sparse.as_dense_types(self.output_shapes, self.output_classes))) + materialized_resource = ( + ged_ops.experimental_materialized_index_dataset_handle( + container=container, + shared_name=shared_name, + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_types(self.output_shapes, + self.output_classes)))) with ops.colocate_with(materialized_resource): - materializer = gen_dataset_ops.indexed_dataset_materialize( + materializer = ged_ops.experimental_indexed_dataset_materialize( self._as_variant_tensor(), materialized_resource) return MaterializedIndexedDataset(materialized_resource, materializer, self.output_classes, self.output_types, @@ -170,7 +171,7 @@ class IdentityIndexedDataset(IndexedDataset): return tensor_shape.scalar() def _as_variant_tensor(self): - return gen_dataset_ops.identity_indexed_dataset(self._size) + return ged_ops.experimental_identity_indexed_dataset(self._size) def _inputs(self): return [] diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index bfa3fdf543..1ee9db1aa8 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -18,8 +18,6 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib import stateless -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.contrib.data.python.ops import random_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import readers @@ -28,6 +26,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_experimental_dataset_ops from tensorflow.python.ops import math_ops from tensorflow.python.util import deprecation @@ -167,10 +166,12 @@ class _DirectedInterleaveDataset(dataset_ops.Dataset): def _as_variant_tensor(self): # pylint: disable=protected-access - return gen_dataset_ops.directed_interleave_dataset( - self._selector_input._as_variant_tensor(), - [data_input._as_variant_tensor() for data_input in self._data_inputs], - **dataset_ops.flat_structure(self)) + return ( + gen_experimental_dataset_ops.experimental_directed_interleave_dataset( + self._selector_input._as_variant_tensor(), [ + data_input._as_variant_tensor() + for data_input in self._data_inputs + ], **dataset_ops.flat_structure(self))) # pylint: enable=protected-access def _inputs(self): diff --git a/tensorflow/contrib/data/python/ops/optimization.py b/tensorflow/contrib/data/python/ops/optimization.py index 3eb172acd5..7f5ce97228 100644 --- a/tensorflow/contrib/data/python/ops/optimization.py +++ b/tensorflow/contrib/data/python/ops/optimization.py @@ -17,12 +17,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops as contrib_gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import gen_experimental_dataset_ops # A constant that can be used to enable auto-tuning. AUTOTUNE = -1 @@ -97,7 +96,7 @@ class _AssertNextDataset(dataset_ops.UnaryDataset): transformations, dtype=dtypes.string, name="transformations") def _as_variant_tensor(self): - return contrib_gen_dataset_ops.assert_next_dataset( + return gen_experimental_dataset_ops.experimental_assert_next_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access self._transformations, **dataset_ops.flat_structure(self)) diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py index 58395879e6..46f82e453a 100644 --- a/tensorflow/contrib/data/python/ops/prefetching_ops.py +++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py @@ -19,8 +19,6 @@ from __future__ import print_function import warnings -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.util import nest @@ -32,7 +30,8 @@ from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops -from tensorflow.python.ops import gen_dataset_ops as core_gen_dataset_ops +from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops from tensorflow.python.ops import resource_variable_ops @@ -64,7 +63,7 @@ def function_buffering_resource(string_arg, """ if shared_name is None: shared_name = "" - return gen_dataset_ops.function_buffering_resource( + return ged_ops.experimental_function_buffering_resource( string_arg=string_arg, target_device=target_device, shared_name=shared_name, @@ -78,14 +77,14 @@ def function_buffering_resource(string_arg, def function_buffering_resource_get_next(function_buffer_resource, output_types, name=None): - return gen_dataset_ops.function_buffering_resource_get_next( + return ged_ops.experimental_function_buffering_resource_get_next( function_buffer_resource=function_buffer_resource, output_types=output_types, name=name) def function_buffering_resource_reset(function_buffer_resource, name=None): - return gen_dataset_ops.function_buffering_resource_reset( + return ged_ops.experimental_function_buffering_resource_reset( function_buffer_resource=function_buffer_resource, name=name) @@ -136,7 +135,7 @@ class _PrefetchToDeviceIterator(object): ret = remote_iterator.get_next() return nest.flatten(sparse.serialize_sparse_tensors(ret)) - iterator_device = gen_dataset_ops.iterator_get_device( + iterator_device = ged_ops.experimental_iterator_get_device( self._input_iterator._iterator_resource) with ops.device(device): @@ -162,10 +161,11 @@ class _PrefetchToDeviceIterator(object): if self._get_next_call_count > iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD: warnings.warn(iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE) - flat_ret = gen_dataset_ops.function_buffering_resource_get_next( + flat_ret = ged_ops.experimental_function_buffering_resource_get_next( self._buffering_resource, - output_types=nest.flatten(sparse.as_dense_types( - self.output_types, self.output_classes)), name=name) + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes)), + name=name) ret = sparse.deserialize_sparse_tensors( nest.pack_sequence_as(self.output_types, flat_ret), @@ -219,7 +219,7 @@ class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator): buffer_size): with ops.device("/device:CPU:0"): super(_PrefetchToDeviceEagerIterator, self).__init__(input_dataset) - input_iterator_handle = core_gen_dataset_ops.iterator_to_string_handle( + input_iterator_handle = gen_dataset_ops.iterator_to_string_handle( self._resource) self._device = device @@ -238,7 +238,8 @@ class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator): self._buffering_resource = function_buffering_resource( f=_prefetch_fn, output_types=self._flat_output_types, - target_device=gen_dataset_ops.iterator_get_device(self._resource), + target_device=ged_ops.experimental_iterator_get_device( + self._resource), string_arg=input_iterator_handle, buffer_size=buffer_size, shared_name=iterator_ops._generate_shared_name( @@ -252,7 +253,7 @@ class _PrefetchToDeviceEagerIterator(iterator_ops.EagerIterator): # TODO(b/77291417): Fix with context.execution_mode(context.SYNC): with ops.device(self._device): - ret = gen_dataset_ops.function_buffering_resource_get_next( + ret = ged_ops.experimental_function_buffering_resource_get_next( function_buffer_resource=self._buffering_resource, output_types=self._flat_output_types) return sparse.deserialize_sparse_tensors( @@ -409,12 +410,12 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset): """ # pylint: disable=protected-access ds_variant = self._input_dataset._as_variant_tensor() - resource = core_gen_dataset_ops.anonymous_iterator( + resource = gen_dataset_ops.anonymous_iterator( output_types=self._flat_output_types, output_shapes=self._flat_output_shapes) with ops.control_dependencies( - [core_gen_dataset_ops.make_iterator(ds_variant, resource)]): - return core_gen_dataset_ops.iterator_to_string_handle(resource) + [gen_dataset_ops.make_iterator(ds_variant, resource)]): + return gen_dataset_ops.iterator_to_string_handle(resource) @function.Defun() def _remote_init_func(): @@ -463,7 +464,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset): Returns: Tensor constant 0 """ - iterator_resource = core_gen_dataset_ops.iterator_from_string_handle_v2( + iterator_resource = gen_dataset_ops.iterator_from_string_handle_v2( string_handle, output_types=self._flat_output_types, output_shapes=self._flat_output_shapes) @@ -504,7 +505,7 @@ class _CopyToDeviceDataset(dataset_ops.UnaryDataset): def _as_variant_tensor(self): with ops.device(self._target_device): - return core_gen_dataset_ops.generator_dataset( + return gen_dataset_ops.generator_dataset( self._init_captured_args, self._next_captured_args, self._finalize_captured_args, diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index d9d06e2703..360971e200 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -23,7 +23,6 @@ import csv import numpy as np from tensorflow.contrib.data.python.ops import batching -from tensorflow.contrib.data.python.ops import gen_dataset_ops as contrib_gen_dataset_ops from tensorflow.contrib.data.python.ops import interleave_ops from tensorflow.contrib.data.python.ops import optimization from tensorflow.contrib.data.python.ops import parsing_ops @@ -38,6 +37,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.lib.io import file_io from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import gen_experimental_dataset_ops from tensorflow.python.platform import gfile from tensorflow.python.util import deprecation @@ -629,7 +629,7 @@ class CsvDataset(dataset_ops.DatasetSource): def _as_variant_tensor(self): # Constructs graph node for the dataset op. - return contrib_gen_dataset_ops.csv_dataset( + return gen_experimental_dataset_ops.experimental_csv_dataset( filenames=self._filenames, record_defaults=self._record_defaults, buffer_size=self._buffer_size, @@ -1013,7 +1013,7 @@ class LMDBDataset(dataset_ops.DatasetSource): filenames, dtype=dtypes.string, name="filenames") def _as_variant_tensor(self): - return contrib_gen_dataset_ops.lmdb_dataset( + return gen_experimental_dataset_ops.experimental_lmdb_dataset( self._filenames, output_types=nest.flatten(self.output_types), output_shapes=nest.flatten(self.output_shapes)) diff --git a/tensorflow/contrib/data/python/ops/threadpool.py b/tensorflow/contrib/data/python/ops/threadpool.py index 9d165ad52a..f73c3fd9cb 100644 --- a/tensorflow/contrib/data/python/ops/threadpool.py +++ b/tensorflow/contrib/data/python/ops/threadpool.py @@ -19,10 +19,9 @@ from __future__ import print_function import threading -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.eager import context +from tensorflow.python.ops import gen_experimental_dataset_ops as ged_ops from tensorflow.python.ops import resource_variable_ops _uid_counter = 0 @@ -47,7 +46,7 @@ class PrivateThreadPool(object): """Creates a `PrivateThreadPool` with the given number of threads.""" if context.executing_eagerly(): shared_name = _generate_shared_name("privatethreadpool") - self._resource = gen_dataset_ops.thread_pool_handle( + self._resource = ged_ops.experimental_thread_pool_handle( num_threads=num_threads, max_intra_op_parallelism=max_intra_op_parallelism, display_name=display_name, @@ -55,7 +54,7 @@ class PrivateThreadPool(object): self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._resource, handle_device=context.context().device_name) else: - self._resource = gen_dataset_ops.thread_pool_handle( + self._resource = ged_ops.experimental_thread_pool_handle( num_threads=num_threads, max_intra_op_parallelism=max_intra_op_parallelism, display_name=display_name) @@ -70,7 +69,7 @@ class _ThreadPoolDataset(dataset_ops.UnaryDataset): self._thread_pool = thread_pool def _as_variant_tensor(self): - return gen_dataset_ops.thread_pool_dataset( + return ged_ops.experimental_thread_pool_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access self._thread_pool._resource, # pylint: disable=protected-access **dataset_ops.flat_structure(self)) diff --git a/tensorflow/contrib/data/python/ops/unique.py b/tensorflow/contrib/data/python/ops/unique.py index bad67a580d..ed363a7090 100644 --- a/tensorflow/contrib/data/python/ops/unique.py +++ b/tensorflow/contrib/data/python/ops/unique.py @@ -17,10 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.data.python.ops import contrib_op_loader # pylint: disable=unused-import -from tensorflow.contrib.data.python.ops import gen_dataset_ops from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes +from tensorflow.python.ops import gen_experimental_dataset_ops def unique(): @@ -61,7 +60,7 @@ class _UniqueDataset(dataset_ops.UnaryDataset): "`tf.int32`, `tf.int64`, or `tf.string` component.") def _as_variant_tensor(self): - return gen_dataset_ops.unique_dataset( + return gen_experimental_dataset_ops.experimental_unique_dataset( self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access **dataset_ops.flat_structure(self)) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index ca247dc56b..50fe308b73 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1039,6 +1039,7 @@ tf_gen_op_libs( "dataset_ops", "decode_proto_ops", "encode_proto_ops", + "experimental_dataset_ops", "function_ops", "functional_ops", "image_ops", @@ -1169,6 +1170,7 @@ cc_library( ":dataset_ops_op_lib", ":decode_proto_ops_op_lib", ":encode_proto_ops_op_lib", + ":experimental_dataset_ops_op_lib", ":function_ops_op_lib", ":functional_ops_op_lib", ":image_ops_op_lib", diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalAssertNextDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalAssertNextDataset.pbtxt new file mode 100644 index 0000000000..fa8fc96bb2 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalAssertNextDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ExperimentalAssertNextDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalCSVDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalCSVDataset.pbtxt new file mode 100644 index 0000000000..5fd88e7a0c --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalCSVDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ExperimentalCSVDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/base_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt new file mode 100644 index 0000000000..ac1f9719fe --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_ExperimentalDirectedInterleaveDataset.pbtxt @@ -0,0 +1,21 @@ +op { + graph_op_name: "ExperimentalDirectedInterleaveDataset" + in_arg { + name: "selector_input_dataset" + description: <